From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../libwebrtc/modules/audio_processing/BUILD.gn | 677 ++++ .../libwebrtc/modules/audio_processing/DEPS | 14 + .../libwebrtc/modules/audio_processing/OWNERS | 8 + .../modules/audio_processing/aec3/BUILD.gn | 384 +++ .../audio_processing/aec3/adaptive_fir_filter.cc | 744 +++++ .../audio_processing/aec3/adaptive_fir_filter.h | 192 ++ .../aec3/adaptive_fir_filter_avx2.cc | 188 ++ .../aec3/adaptive_fir_filter_erl.cc | 102 + .../aec3/adaptive_fir_filter_erl.h | 54 + .../aec3/adaptive_fir_filter_erl_avx2.cc | 37 + .../aec3/adaptive_fir_filter_erl_gn/moz.build | 205 ++ .../aec3/adaptive_fir_filter_erl_unittest.cc | 106 + .../aec3/adaptive_fir_filter_gn/moz.build | 216 ++ .../aec3/adaptive_fir_filter_unittest.cc | 594 ++++ .../audio_processing/aec3/aec3_avx2_gn/moz.build | 190 ++ .../modules/audio_processing/aec3/aec3_common.cc | 58 + .../modules/audio_processing/aec3/aec3_common.h | 114 + .../audio_processing/aec3/aec3_common_gn/moz.build | 201 ++ .../modules/audio_processing/aec3/aec3_fft.cc | 144 + .../modules/audio_processing/aec3/aec3_fft.h | 75 + .../audio_processing/aec3/aec3_fft_gn/moz.build | 216 ++ .../audio_processing/aec3/aec3_fft_unittest.cc | 213 ++ .../audio_processing/aec3/aec3_gn/moz.build | 289 ++ .../modules/audio_processing/aec3/aec_state.cc | 481 +++ .../modules/audio_processing/aec3/aec_state.h | 300 ++ .../audio_processing/aec3/aec_state_unittest.cc | 297 ++ .../audio_processing/aec3/alignment_mixer.cc | 163 + .../audio_processing/aec3/alignment_mixer.h | 57 + .../aec3/alignment_mixer_unittest.cc | 196 ++ .../aec3/api_call_jitter_metrics.cc | 121 + .../aec3/api_call_jitter_metrics.h | 60 + .../aec3/api_call_jitter_metrics_unittest.cc | 109 + .../modules/audio_processing/aec3/block.h | 91 + .../modules/audio_processing/aec3/block_buffer.cc | 23 + .../modules/audio_processing/aec3/block_buffer.h | 60 + .../audio_processing/aec3/block_delay_buffer.cc | 69 + .../audio_processing/aec3/block_delay_buffer.h | 43 + .../aec3/block_delay_buffer_unittest.cc | 105 + .../modules/audio_processing/aec3/block_framer.cc | 83 + .../modules/audio_processing/aec3/block_framer.h | 49 + .../audio_processing/aec3/block_framer_unittest.cc | 337 ++ .../audio_processing/aec3/block_processor.cc | 290 ++ .../audio_processing/aec3/block_processor.h | 81 + .../aec3/block_processor_metrics.cc | 104 + .../aec3/block_processor_metrics.h | 46 + .../aec3/block_processor_metrics_unittest.cc | 34 + .../aec3/block_processor_unittest.cc | 341 ++ .../audio_processing/aec3/clockdrift_detector.cc | 61 + .../audio_processing/aec3/clockdrift_detector.h | 40 + .../aec3/clockdrift_detector_unittest.cc | 57 + .../aec3/coarse_filter_update_gain.cc | 103 + .../aec3/coarse_filter_update_gain.h | 74 + .../aec3/coarse_filter_update_gain_unittest.cc | 268 ++ .../aec3/comfort_noise_generator.cc | 186 ++ .../aec3/comfort_noise_generator.h | 77 + .../aec3/comfort_noise_generator_unittest.cc | 72 + .../audio_processing/aec3/config_selector.cc | 71 + .../audio_processing/aec3/config_selector.h | 41 + .../aec3/config_selector_unittest.cc | 116 + .../modules/audio_processing/aec3/decimator.cc | 91 + .../modules/audio_processing/aec3/decimator.h | 41 + .../audio_processing/aec3/decimator_unittest.cc | 135 + .../modules/audio_processing/aec3/delay_estimate.h | 33 + .../aec3/dominant_nearend_detector.cc | 75 + .../aec3/dominant_nearend_detector.h | 56 + .../aec3/downsampled_render_buffer.cc | 25 + .../aec3/downsampled_render_buffer.h | 58 + .../audio_processing/aec3/echo_audibility.cc | 119 + .../audio_processing/aec3/echo_audibility.h | 85 + .../audio_processing/aec3/echo_canceller3.cc | 992 ++++++ .../audio_processing/aec3/echo_canceller3.h | 230 ++ .../aec3/echo_canceller3_unittest.cc | 1160 +++++++ .../aec3/echo_path_delay_estimator.cc | 127 + .../aec3/echo_path_delay_estimator.h | 80 + .../aec3/echo_path_delay_estimator_unittest.cc | 184 ++ .../audio_processing/aec3/echo_path_variability.cc | 22 + .../audio_processing/aec3/echo_path_variability.h | 37 + .../aec3/echo_path_variability_unittest.cc | 50 + .../modules/audio_processing/aec3/echo_remover.cc | 521 +++ .../modules/audio_processing/aec3/echo_remover.h | 62 + .../audio_processing/aec3/echo_remover_metrics.cc | 157 + .../audio_processing/aec3/echo_remover_metrics.h | 78 + .../aec3/echo_remover_metrics_unittest.cc | 156 + .../audio_processing/aec3/echo_remover_unittest.cc | 210 ++ .../modules/audio_processing/aec3/erl_estimator.cc | 146 + .../modules/audio_processing/aec3/erl_estimator.h | 58 + .../aec3/erl_estimator_unittest.cc | 104 + .../audio_processing/aec3/erle_estimator.cc | 89 + .../modules/audio_processing/aec3/erle_estimator.h | 112 + .../aec3/erle_estimator_unittest.cc | 288 ++ .../modules/audio_processing/aec3/fft_buffer.cc | 27 + .../modules/audio_processing/aec3/fft_buffer.h | 60 + .../modules/audio_processing/aec3/fft_data.h | 104 + .../modules/audio_processing/aec3/fft_data_avx2.cc | 33 + .../audio_processing/aec3/fft_data_gn/moz.build | 205 ++ .../audio_processing/aec3/fft_data_unittest.cc | 186 ++ .../audio_processing/aec3/filter_analyzer.cc | 289 ++ .../audio_processing/aec3/filter_analyzer.h | 150 + .../aec3/filter_analyzer_unittest.cc | 33 + .../modules/audio_processing/aec3/frame_blocker.cc | 80 + .../modules/audio_processing/aec3/frame_blocker.h | 51 + .../aec3/frame_blocker_unittest.cc | 425 +++ .../aec3/fullband_erle_estimator.cc | 191 ++ .../aec3/fullband_erle_estimator.h | 118 + .../audio_processing/aec3/matched_filter.cc | 900 +++++ .../modules/audio_processing/aec3/matched_filter.h | 190 ++ .../audio_processing/aec3/matched_filter_avx2.cc | 261 ++ .../aec3/matched_filter_gn/moz.build | 205 ++ .../aec3/matched_filter_lag_aggregator.cc | 166 + .../aec3/matched_filter_lag_aggregator.h | 97 + .../aec3/matched_filter_lag_aggregator_unittest.cc | 113 + .../aec3/matched_filter_unittest.cc | 612 ++++ .../aec3/mock/mock_block_processor.cc | 20 + .../aec3/mock/mock_block_processor.h | 53 + .../aec3/mock/mock_echo_remover.cc | 20 + .../audio_processing/aec3/mock/mock_echo_remover.h | 56 + .../aec3/mock/mock_render_delay_buffer.cc | 36 + .../aec3/mock/mock_render_delay_buffer.h | 67 + .../aec3/mock/mock_render_delay_controller.cc | 20 + .../aec3/mock/mock_render_delay_controller.h | 42 + .../audio_processing/aec3/moving_average.cc | 60 + .../modules/audio_processing/aec3/moving_average.h | 45 + .../aec3/moving_average_unittest.cc | 89 + .../aec3/multi_channel_content_detector.cc | 148 + .../aec3/multi_channel_content_detector.h | 96 + .../multi_channel_content_detector_unittest.cc | 470 +++ .../audio_processing/aec3/nearend_detector.h | 42 + .../aec3/refined_filter_update_gain.cc | 173 + .../aec3/refined_filter_update_gain.h | 91 + .../aec3/refined_filter_update_gain_unittest.cc | 392 +++ .../modules/audio_processing/aec3/render_buffer.cc | 81 + .../modules/audio_processing/aec3/render_buffer.h | 115 + .../aec3/render_buffer_gn/moz.build | 205 ++ .../aec3/render_buffer_unittest.cc | 46 + .../audio_processing/aec3/render_delay_buffer.cc | 519 +++ .../audio_processing/aec3/render_delay_buffer.h | 86 + .../aec3/render_delay_buffer_unittest.cc | 130 + .../aec3/render_delay_controller.cc | 186 ++ .../aec3/render_delay_controller.h | 51 + .../aec3/render_delay_controller_metrics.cc | 132 + .../aec3/render_delay_controller_metrics.h | 49 + .../render_delay_controller_metrics_unittest.cc | 72 + .../aec3/render_delay_controller_unittest.cc | 334 ++ .../aec3/render_signal_analyzer.cc | 156 + .../audio_processing/aec3/render_signal_analyzer.h | 62 + .../aec3/render_signal_analyzer_unittest.cc | 171 + .../aec3/residual_echo_estimator.cc | 379 +++ .../aec3/residual_echo_estimator.h | 85 + .../aec3/residual_echo_estimator_unittest.cc | 199 ++ .../aec3/reverb_decay_estimator.cc | 410 +++ .../audio_processing/aec3/reverb_decay_estimator.h | 120 + .../aec3/reverb_frequency_response.cc | 108 + .../aec3/reverb_frequency_response.h | 55 + .../modules/audio_processing/aec3/reverb_model.cc | 59 + .../modules/audio_processing/aec3/reverb_model.h | 58 + .../aec3/reverb_model_estimator.cc | 57 + .../audio_processing/aec3/reverb_model_estimator.h | 72 + .../aec3/reverb_model_estimator_unittest.cc | 157 + .../aec3/signal_dependent_erle_estimator.cc | 416 +++ .../aec3/signal_dependent_erle_estimator.h | 104 + .../signal_dependent_erle_estimator_unittest.cc | 208 ++ .../audio_processing/aec3/spectrum_buffer.cc | 30 + .../audio_processing/aec3/spectrum_buffer.h | 62 + .../aec3/stationarity_estimator.cc | 241 ++ .../audio_processing/aec3/stationarity_estimator.h | 123 + .../aec3/subband_erle_estimator.cc | 251 ++ .../audio_processing/aec3/subband_erle_estimator.h | 106 + .../aec3/subband_nearend_detector.cc | 70 + .../aec3/subband_nearend_detector.h | 52 + .../modules/audio_processing/aec3/subtractor.cc | 364 +++ .../modules/audio_processing/aec3/subtractor.h | 150 + .../audio_processing/aec3/subtractor_output.cc | 58 + .../audio_processing/aec3/subtractor_output.h | 52 + .../aec3/subtractor_output_analyzer.cc | 64 + .../aec3/subtractor_output_analyzer.h | 45 + .../audio_processing/aec3/subtractor_unittest.cc | 320 ++ .../audio_processing/aec3/suppression_filter.cc | 180 + .../audio_processing/aec3/suppression_filter.h | 51 + .../aec3/suppression_filter_unittest.cc | 257 ++ .../audio_processing/aec3/suppression_gain.cc | 465 +++ .../audio_processing/aec3/suppression_gain.h | 145 + .../aec3/suppression_gain_unittest.cc | 149 + .../audio_processing/aec3/transparent_mode.cc | 243 ++ .../audio_processing/aec3/transparent_mode.h | 47 + .../modules/audio_processing/aec3/vector_math.h | 229 ++ .../audio_processing/aec3/vector_math_avx2.cc | 82 + .../audio_processing/aec3/vector_math_gn/moz.build | 205 ++ .../audio_processing/aec3/vector_math_unittest.cc | 209 ++ .../modules/audio_processing/aec_dump/BUILD.gn | 112 + .../audio_processing/aec_dump/aec_dump_factory.h | 48 + .../aec_dump/aec_dump_gn/moz.build | 209 ++ .../audio_processing/aec_dump/aec_dump_impl.cc | 281 ++ .../audio_processing/aec_dump/aec_dump_impl.h | 85 + .../aec_dump/aec_dump_integration_test.cc | 93 + .../audio_processing/aec_dump/aec_dump_unittest.cc | 87 + .../aec_dump/capture_stream_info.cc | 61 + .../aec_dump/capture_stream_info.h | 66 + .../audio_processing/aec_dump/mock_aec_dump.cc | 19 + .../audio_processing/aec_dump/mock_aec_dump.h | 82 + .../aec_dump/null_aec_dump_factory.cc | 34 + .../aec_dump/null_aec_dump_factory_gn/moz.build | 225 ++ .../aec_dump_interface_gn/moz.build | 225 ++ .../modules/audio_processing/aecm/BUILD.gn | 44 + .../modules/audio_processing/aecm/aecm_core.cc | 1125 +++++++ .../modules/audio_processing/aecm/aecm_core.h | 441 +++ .../modules/audio_processing/aecm/aecm_core_c.cc | 671 ++++ .../audio_processing/aecm/aecm_core_gn/moz.build | 293 ++ .../audio_processing/aecm/aecm_core_mips.cc | 1656 ++++++++++ .../audio_processing/aecm/aecm_core_neon.cc | 206 ++ .../modules/audio_processing/aecm/aecm_defines.h | 87 + .../audio_processing/aecm/echo_control_mobile.cc | 599 ++++ .../audio_processing/aecm/echo_control_mobile.h | 209 ++ .../modules/audio_processing/agc/BUILD.gn | 126 + .../libwebrtc/modules/audio_processing/agc/agc.cc | 98 + .../libwebrtc/modules/audio_processing/agc/agc.h | 52 + .../modules/audio_processing/agc/agc_gn/moz.build | 233 ++ .../audio_processing/agc/agc_manager_direct.cc | 713 ++++ .../audio_processing/agc/agc_manager_direct.h | 278 ++ .../agc/agc_manager_direct_unittest.cc | 2184 +++++++++++++ .../modules/audio_processing/agc/gain_control.h | 105 + .../agc/gain_control_interface_gn/moz.build | 201 ++ .../audio_processing/agc/legacy/analog_agc.cc | 1238 +++++++ .../audio_processing/agc/legacy/analog_agc.h | 118 + .../audio_processing/agc/legacy/digital_agc.cc | 704 ++++ .../audio_processing/agc/legacy/digital_agc.h | 75 + .../audio_processing/agc/legacy/gain_control.h | 256 ++ .../audio_processing/agc/legacy_agc_gn/moz.build | 233 ++ .../agc/level_estimation_gn/moz.build | 234 ++ .../audio_processing/agc/loudness_histogram.cc | 229 ++ .../audio_processing/agc/loudness_histogram.h | 90 + .../agc/loudness_histogram_unittest.cc | 107 + .../modules/audio_processing/agc/mock_agc.h | 32 + .../modules/audio_processing/agc/utility.cc | 39 + .../modules/audio_processing/agc/utility.h | 27 + .../modules/audio_processing/agc2/BUILD.gn | 511 +++ .../agc2/adaptive_digital_gain_controller.cc | 216 ++ .../agc2/adaptive_digital_gain_controller.h | 66 + .../adaptive_digital_gain_controller_gn/moz.build | 233 ++ .../adaptive_digital_gain_controller_unittest.cc | 312 ++ .../modules/audio_processing/agc2/agc2_common.h | 62 + .../audio_processing/agc2/agc2_testing_common.cc | 93 + .../audio_processing/agc2/agc2_testing_common.h | 82 + .../agc2/agc2_testing_common_unittest.cc | 27 + .../modules/audio_processing/agc2/biquad_filter.cc | 60 + .../modules/audio_processing/agc2/biquad_filter.h | 56 + .../agc2/biquad_filter_gn/moz.build | 221 ++ .../agc2/biquad_filter_unittest.cc | 175 + .../audio_processing/agc2/clipping_predictor.cc | 384 +++ .../audio_processing/agc2/clipping_predictor.h | 62 + .../agc2/clipping_predictor_gn/moz.build | 233 ++ .../agc2/clipping_predictor_level_buffer.cc | 77 + .../agc2/clipping_predictor_level_buffer.h | 71 + .../clipping_predictor_level_buffer_unittest.cc | 131 + .../agc2/clipping_predictor_unittest.cc | 491 +++ .../audio_processing/agc2/common_gn/moz.build | 201 ++ .../agc2/compute_interpolated_gain_curve.cc | 229 ++ .../agc2/compute_interpolated_gain_curve.h | 48 + .../modules/audio_processing/agc2/cpu_features.cc | 62 + .../modules/audio_processing/agc2/cpu_features.h | 39 + .../agc2/cpu_features_gn/moz.build | 232 ++ .../agc2/fixed_digital_gn/moz.build | 235 ++ .../agc2/fixed_digital_level_estimator.cc | 121 + .../agc2/fixed_digital_level_estimator.h | 66 + .../agc2/fixed_digital_level_estimator_unittest.cc | 159 + .../modules/audio_processing/agc2/gain_applier.cc | 103 + .../modules/audio_processing/agc2/gain_applier.h | 44 + .../agc2/gain_applier_gn/moz.build | 221 ++ .../audio_processing/agc2/gain_applier_unittest.cc | 93 + .../audio_processing/agc2/gain_map_gn/moz.build | 201 ++ .../audio_processing/agc2/gain_map_internal.h | 46 + .../agc2/input_volume_controller.cc | 580 ++++ .../agc2/input_volume_controller.h | 282 ++ .../agc2/input_volume_controller_gn/moz.build | 234 ++ .../agc2/input_volume_controller_unittest.cc | 1857 +++++++++++ .../agc2/input_volume_stats_reporter.cc | 171 + .../agc2/input_volume_stats_reporter.h | 96 + .../agc2/input_volume_stats_reporter_gn/moz.build | 225 ++ .../agc2/input_volume_stats_reporter_unittest.cc | 246 ++ .../agc2/interpolated_gain_curve.cc | 204 ++ .../agc2/interpolated_gain_curve.h | 152 + .../agc2/interpolated_gain_curve_unittest.cc | 203 ++ .../modules/audio_processing/agc2/limiter.cc | 155 + .../modules/audio_processing/agc2/limiter.h | 63 + .../audio_processing/agc2/limiter_db_gain_curve.cc | 138 + .../audio_processing/agc2/limiter_db_gain_curve.h | 76 + .../agc2/limiter_db_gain_curve_unittest.cc | 60 + .../audio_processing/agc2/limiter_unittest.cc | 60 + .../audio_processing/agc2/noise_level_estimator.cc | 172 + .../audio_processing/agc2/noise_level_estimator.h | 36 + .../agc2/noise_level_estimator_gn/moz.build | 233 ++ .../agc2/noise_level_estimator_unittest.cc | 98 + .../modules/audio_processing/agc2/rnn_vad/BUILD.gn | 334 ++ .../modules/audio_processing/agc2/rnn_vad/DEPS | 3 + .../agc2/rnn_vad/auto_correlation.cc | 91 + .../agc2/rnn_vad/auto_correlation.h | 49 + .../agc2/rnn_vad/auto_correlation_unittest.cc | 66 + .../modules/audio_processing/agc2/rnn_vad/common.h | 77 + .../agc2/rnn_vad/features_extraction.cc | 90 + .../agc2/rnn_vad/features_extraction.h | 61 + .../agc2/rnn_vad/features_extraction_unittest.cc | 103 + .../audio_processing/agc2/rnn_vad/lp_residual.cc | 141 + .../audio_processing/agc2/rnn_vad/lp_residual.h | 41 + .../agc2/rnn_vad/lp_residual_unittest.cc | 80 + .../audio_processing/agc2/rnn_vad/pitch_search.cc | 70 + .../audio_processing/agc2/rnn_vad/pitch_search.h | 54 + .../agc2/rnn_vad/pitch_search_internal.cc | 513 +++ .../agc2/rnn_vad/pitch_search_internal.h | 114 + .../agc2/rnn_vad/pitch_search_internal_unittest.cc | 217 ++ .../agc2/rnn_vad/pitch_search_unittest.cc | 53 + .../audio_processing/agc2/rnn_vad/ring_buffer.h | 65 + .../agc2/rnn_vad/ring_buffer_unittest.cc | 112 + .../modules/audio_processing/agc2/rnn_vad/rnn.cc | 91 + .../modules/audio_processing/agc2/rnn_vad/rnn.h | 53 + .../audio_processing/agc2/rnn_vad/rnn_fc.cc | 103 + .../modules/audio_processing/agc2/rnn_vad/rnn_fc.h | 72 + .../agc2/rnn_vad/rnn_fc_unittest.cc | 111 + .../audio_processing/agc2/rnn_vad/rnn_gru.cc | 198 ++ .../audio_processing/agc2/rnn_vad/rnn_gru.h | 70 + .../agc2/rnn_vad/rnn_gru_unittest.cc | 186 ++ .../audio_processing/agc2/rnn_vad/rnn_unittest.cc | 70 + .../rnn_vad/rnn_vad_auto_correlation_gn/moz.build | 232 ++ .../agc2/rnn_vad/rnn_vad_common_gn/moz.build | 216 ++ .../agc2/rnn_vad/rnn_vad_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_layers_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build | 221 ++ .../agc2/rnn_vad/rnn_vad_pitch_gn/moz.build | 233 ++ .../agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build | 205 ++ .../rnn_vad/rnn_vad_sequence_buffer_gn/moz.build | 205 ++ .../rnn_vad/rnn_vad_spectral_features_gn/moz.build | 233 ++ .../rnn_vad_symmetric_matrix_buffer_gn/moz.build | 205 ++ .../audio_processing/agc2/rnn_vad/rnn_vad_tool.cc | 123 + .../agc2/rnn_vad/rnn_vad_unittest.cc | 185 ++ .../agc2/rnn_vad/sequence_buffer.h | 79 + .../agc2/rnn_vad/sequence_buffer_unittest.cc | 102 + .../agc2/rnn_vad/spectral_features.cc | 214 ++ .../agc2/rnn_vad/spectral_features.h | 79 + .../agc2/rnn_vad/spectral_features_internal.cc | 188 ++ .../agc2/rnn_vad/spectral_features_internal.h | 100 + .../rnn_vad/spectral_features_internal_unittest.cc | 160 + .../agc2/rnn_vad/spectral_features_unittest.cc | 160 + .../agc2/rnn_vad/symmetric_matrix_buffer.h | 95 + .../rnn_vad/symmetric_matrix_buffer_unittest.cc | 107 + .../audio_processing/agc2/rnn_vad/test_utils.cc | 143 + .../audio_processing/agc2/rnn_vad/test_utils.h | 130 + .../audio_processing/agc2/rnn_vad/vector_math.h | 114 + .../agc2/rnn_vad/vector_math_avx2.cc | 55 + .../agc2/rnn_vad/vector_math_avx2_gn/moz.build | 185 ++ .../agc2/rnn_vad/vector_math_gn/moz.build | 216 ++ .../agc2/rnn_vad/vector_math_unittest.cc | 71 + .../audio_processing/agc2/saturation_protector.cc | 183 ++ .../audio_processing/agc2/saturation_protector.h | 46 + .../agc2/saturation_protector_buffer.cc | 77 + .../agc2/saturation_protector_buffer.h | 59 + .../agc2/saturation_protector_buffer_unittest.cc | 73 + .../agc2/saturation_protector_gn/moz.build | 234 ++ .../agc2/saturation_protector_unittest.cc | 140 + .../agc2/speech_level_estimator.cc | 174 + .../audio_processing/agc2/speech_level_estimator.h | 81 + .../agc2/speech_level_estimator_gn/moz.build | 233 ++ .../agc2/speech_level_estimator_unittest.cc | 207 ++ .../agc2/speech_probability_buffer.cc | 105 + .../agc2/speech_probability_buffer.h | 80 + .../agc2/speech_probability_buffer_unittest.cc | 346 ++ .../modules/audio_processing/agc2/vad_wrapper.cc | 113 + .../modules/audio_processing/agc2/vad_wrapper.h | 82 + .../audio_processing/agc2/vad_wrapper_gn/moz.build | 232 ++ .../audio_processing/agc2/vad_wrapper_unittest.cc | 181 + .../audio_processing/agc2/vector_float_frame.cc | 39 + .../audio_processing/agc2/vector_float_frame.h | 42 + .../modules/audio_processing/api_gn/moz.build | 225 ++ .../audio_processing/apm_logging_gn/moz.build | 233 ++ .../modules/audio_processing/audio_buffer.cc | 396 +++ .../modules/audio_processing/audio_buffer.h | 172 + .../audio_processing/audio_buffer_gn/moz.build | 235 ++ .../audio_processing/audio_buffer_unittest.cc | 93 + .../audio_frame_proxies_gn/moz.build | 225 ++ .../audio_processing/audio_frame_view_gn/moz.build | 205 ++ .../audio_processing/audio_frame_view_unittest.cc | 51 + .../audio_processing_builder_impl.cc | 34 + .../audio_processing/audio_processing_gn/moz.build | 239 ++ .../audio_processing/audio_processing_impl.cc | 2649 +++++++++++++++ .../audio_processing/audio_processing_impl.h | 603 ++++ .../audio_processing_impl_locking_unittest.cc | 1012 ++++++ .../audio_processing_impl_unittest.cc | 1569 +++++++++ .../audio_processing_performance_unittest.cc | 568 ++++ .../audio_processing_statistics_gn/moz.build | 217 ++ .../audio_processing/audio_processing_unittest.cc | 3441 ++++++++++++++++++++ .../capture_levels_adjuster/BUILD.gn | 45 + .../audio_samples_scaler.cc | 92 + .../capture_levels_adjuster/audio_samples_scaler.h | 46 + .../audio_samples_scaler_unittest.cc | 204 ++ .../capture_levels_adjuster.cc | 96 + .../capture_levels_adjuster.h | 88 + .../capture_levels_adjuster_gn/moz.build | 233 ++ .../capture_levels_adjuster_unittest.cc | 187 ++ .../libwebrtc/modules/audio_processing/debug.proto | 115 + .../echo_control_mobile_bit_exact_unittest.cc | 221 ++ .../audio_processing/echo_control_mobile_impl.cc | 287 ++ .../audio_processing/echo_control_mobile_impl.h | 86 + .../echo_control_mobile_unittest.cc | 43 + .../echo_detector/circular_buffer.cc | 49 + .../echo_detector/circular_buffer.h | 44 + .../echo_detector/circular_buffer_unittest.cc | 53 + .../echo_detector/mean_variance_estimator.cc | 47 + .../echo_detector/mean_variance_estimator.h | 33 + .../mean_variance_estimator_unittest.cc | 65 + .../audio_processing/echo_detector/moving_max.cc | 52 + .../audio_processing/echo_detector/moving_max.h | 36 + .../echo_detector/moving_max_unittest.cc | 68 + .../normalized_covariance_estimator.cc | 43 + .../normalized_covariance_estimator.h | 43 + .../normalized_covariance_estimator_unittest.cc | 41 + .../g3doc/audio_processing_module.md | 26 + .../modules/audio_processing/gain_control_impl.cc | 373 +++ .../modules/audio_processing/gain_control_impl.h | 91 + .../audio_processing/gain_control_unittest.cc | 393 +++ .../modules/audio_processing/gain_controller2.cc | 283 ++ .../modules/audio_processing/gain_controller2.h | 110 + .../audio_processing/gain_controller2_gn/moz.build | 233 ++ .../audio_processing/gain_controller2_unittest.cc | 615 ++++ .../modules/audio_processing/high_pass_filter.cc | 115 + .../modules/audio_processing/high_pass_filter.h | 45 + .../audio_processing/high_pass_filter_gn/moz.build | 232 ++ .../audio_processing/high_pass_filter_unittest.cc | 301 ++ .../modules/audio_processing/include/aec_dump.cc | 41 + .../modules/audio_processing/include/aec_dump.h | 116 + .../include/audio_frame_proxies.cc | 66 + .../audio_processing/include/audio_frame_proxies.h | 41 + .../audio_processing/include/audio_frame_view.h | 68 + .../audio_processing/include/audio_processing.cc | 210 ++ .../audio_processing/include/audio_processing.h | 941 ++++++ .../include/audio_processing_statistics.cc | 22 + .../include/audio_processing_statistics.h | 67 + .../include/mock_audio_processing.h | 178 + .../audio_processing/logging/apm_data_dumper.cc | 100 + .../audio_processing/logging/apm_data_dumper.h | 452 +++ .../libwebrtc/modules/audio_processing/ns/BUILD.gn | 104 + .../modules/audio_processing/ns/fast_math.cc | 84 + .../modules/audio_processing/ns/fast_math.h | 38 + .../modules/audio_processing/ns/histograms.cc | 47 + .../modules/audio_processing/ns/histograms.h | 55 + .../modules/audio_processing/ns/noise_estimator.cc | 195 ++ .../modules/audio_processing/ns/noise_estimator.h | 77 + .../audio_processing/ns/noise_suppressor.cc | 555 ++++ .../modules/audio_processing/ns/noise_suppressor.h | 92 + .../ns/noise_suppressor_unittest.cc | 102 + .../modules/audio_processing/ns/ns_common.h | 34 + .../modules/audio_processing/ns/ns_config.h | 24 + .../modules/audio_processing/ns/ns_fft.cc | 64 + .../libwebrtc/modules/audio_processing/ns/ns_fft.h | 45 + .../modules/audio_processing/ns/ns_gn/moz.build | 245 ++ .../audio_processing/ns/prior_signal_model.cc | 18 + .../audio_processing/ns/prior_signal_model.h | 32 + .../ns/prior_signal_model_estimator.cc | 170 + .../ns/prior_signal_model_estimator.h | 39 + .../ns/quantile_noise_estimator.cc | 88 + .../audio_processing/ns/quantile_noise_estimator.h | 45 + .../modules/audio_processing/ns/signal_model.cc | 24 + .../modules/audio_processing/ns/signal_model.h | 34 + .../audio_processing/ns/signal_model_estimator.cc | 175 + .../audio_processing/ns/signal_model_estimator.h | 58 + .../ns/speech_probability_estimator.cc | 103 + .../ns/speech_probability_estimator.h | 51 + .../audio_processing/ns/suppression_params.cc | 49 + .../audio_processing/ns/suppression_params.h | 30 + .../modules/audio_processing/ns/wiener_filter.cc | 120 + .../modules/audio_processing/ns/wiener_filter.h | 57 + .../optionally_built_submodule_creators.cc | 36 + .../optionally_built_submodule_creators.h | 42 + .../moz.build | 232 ++ .../audio_processing/render_queue_item_verifier.h | 36 + .../audio_processing/residual_echo_detector.cc | 205 ++ .../audio_processing/residual_echo_detector.h | 91 + .../residual_echo_detector_unittest.cc | 138 + .../modules/audio_processing/rms_level.cc | 138 + .../libwebrtc/modules/audio_processing/rms_level.h | 77 + .../audio_processing/rms_level_gn/moz.build | 221 ++ .../modules/audio_processing/rms_level_unittest.cc | 197 ++ .../modules/audio_processing/splitting_filter.cc | 144 + .../modules/audio_processing/splitting_filter.h | 72 + .../audio_processing/splitting_filter_unittest.cc | 103 + .../test/aec_dump_based_simulator.cc | 656 ++++ .../test/aec_dump_based_simulator.h | 82 + .../test/android/apmtest/AndroidManifest.xml | 30 + .../test/android/apmtest/default.properties | 11 + .../test/android/apmtest/jni/main.c | 307 ++ .../test/android/apmtest/res/values/strings.xml | 4 + .../audio_processing/test/api_call_statistics.cc | 95 + .../audio_processing/test/api_call_statistics.h | 47 + .../modules/audio_processing/test/apmtest.m | 365 +++ .../audio_processing/test/audio_buffer_tools.cc | 68 + .../audio_processing/test/audio_buffer_tools.h | 42 + .../test/audio_processing_builder_for_testing.cc | 51 + .../test/audio_processing_builder_for_testing.h | 95 + .../test/audio_processing_simulator.cc | 630 ++++ .../test/audio_processing_simulator.h | 247 ++ .../audio_processing/test/audioproc_float_impl.cc | 821 +++++ .../audio_processing/test/audioproc_float_impl.h | 51 + .../audio_processing/test/bitexactness_tools.cc | 148 + .../audio_processing/test/bitexactness_tools.h | 56 + .../test/conversational_speech/BUILD.gn | 81 + .../test/conversational_speech/OWNERS | 3 + .../test/conversational_speech/README.md | 74 + .../test/conversational_speech/config.cc | 31 + .../test/conversational_speech/config.h | 43 + .../test/conversational_speech/generator.cc | 89 + .../conversational_speech/generator_unittest.cc | 675 ++++ .../test/conversational_speech/mock_wavreader.cc | 34 + .../test/conversational_speech/mock_wavreader.h | 48 + .../mock_wavreader_factory.cc | 66 + .../conversational_speech/mock_wavreader_factory.h | 59 + .../test/conversational_speech/multiend_call.cc | 193 ++ .../test/conversational_speech/multiend_call.h | 104 + .../test/conversational_speech/simulator.cc | 235 ++ .../test/conversational_speech/simulator.h | 44 + .../test/conversational_speech/timing.cc | 73 + .../test/conversational_speech/timing.h | 51 + .../wavreader_abstract_factory.h | 34 + .../conversational_speech/wavreader_factory.cc | 65 + .../test/conversational_speech/wavreader_factory.h | 36 + .../conversational_speech/wavreader_interface.h | 40 + .../audio_processing/test/debug_dump_replayer.cc | 250 ++ .../audio_processing/test/debug_dump_replayer.h | 78 + .../audio_processing/test/debug_dump_test.cc | 504 +++ .../test/echo_canceller_test_tools.cc | 47 + .../test/echo_canceller_test_tools.h | 47 + .../test/echo_canceller_test_tools_unittest.cc | 82 + .../audio_processing/test/echo_control_mock.h | 46 + .../audio_processing/test/fake_recording_device.cc | 190 ++ .../audio_processing/test/fake_recording_device.h | 74 + .../test/fake_recording_device_unittest.cc | 231 ++ .../audio_processing/test/performance_timer.cc | 75 + .../audio_processing/test/performance_timer.h | 47 + .../audio_processing/test/protobuf_utils.cc | 79 + .../modules/audio_processing/test/protobuf_utils.h | 40 + .../test/py_quality_assessment/BUILD.gn | 170 + .../test/py_quality_assessment/OWNERS | 5 + .../test/py_quality_assessment/README.md | 125 + .../py_quality_assessment/apm_configs/default.json | 1 + .../apm_quality_assessment.py | 217 ++ .../apm_quality_assessment.sh | 91 + .../apm_quality_assessment_boxplot.py | 154 + .../apm_quality_assessment_export.py | 63 + .../apm_quality_assessment_gencfgs.py | 128 + .../apm_quality_assessment_optimize.py | 189 ++ .../apm_quality_assessment_unittest.py | 28 + .../test/py_quality_assessment/output/README.md | 1 + .../quality_assessment/__init__.py | 7 + .../quality_assessment/annotations.py | 296 ++ .../quality_assessment/annotations_unittest.py | 160 + .../quality_assessment/apm_configs/default.json | 1 + .../quality_assessment/apm_vad.cc | 96 + .../quality_assessment/audioproc_wrapper.py | 100 + .../quality_assessment/collect_data.py | 243 ++ .../quality_assessment/data_access.py | 154 + .../quality_assessment/echo_path_simulation.py | 136 + .../echo_path_simulation_factory.py | 48 + .../echo_path_simulation_unittest.py | 82 + .../quality_assessment/eval_scores.py | 427 +++ .../quality_assessment/eval_scores_factory.py | 55 + .../quality_assessment/eval_scores_unittest.py | 137 + .../quality_assessment/evaluation.py | 57 + .../quality_assessment/exceptions.py | 45 + .../quality_assessment/export.py | 426 +++ .../quality_assessment/export_unittest.py | 86 + .../quality_assessment/external_vad.py | 75 + .../quality_assessment/fake_external_vad.py | 25 + .../quality_assessment/fake_polqa.cc | 56 + .../quality_assessment/input_mixer.py | 97 + .../quality_assessment/input_mixer_unittest.py | 140 + .../quality_assessment/input_signal_creator.py | 68 + .../quality_assessment/results.css | 32 + .../quality_assessment/results.js | 376 +++ .../quality_assessment/signal_processing.py | 359 ++ .../signal_processing_unittest.py | 183 ++ .../quality_assessment/simulation.py | 446 +++ .../quality_assessment/simulation_unittest.py | 203 ++ .../quality_assessment/sound_level.cc | 127 + .../quality_assessment/test_data_generation.py | 526 +++ .../test_data_generation_factory.py | 71 + .../test_data_generation_unittest.py | 207 ++ .../quality_assessment/vad.cc | 103 + .../audio_processing/test/runtime_setting_util.cc | 50 + .../audio_processing/test/runtime_setting_util.h | 23 + .../audio_processing/test/simulator_buffers.cc | 86 + .../audio_processing/test/simulator_buffers.h | 66 + .../modules/audio_processing/test/test_utils.cc | 89 + .../modules/audio_processing/test/test_utils.h | 170 + .../modules/audio_processing/test/unittest.proto | 48 + .../audio_processing/test/wav_based_simulator.cc | 202 ++ .../audio_processing/test/wav_based_simulator.h | 63 + .../audio_processing/three_band_filter_bank.cc | 278 ++ .../audio_processing/three_band_filter_bank.h | 77 + .../modules/audio_processing/transient/BUILD.gn | 133 + .../audio_processing/transient/click_annotate.cc | 107 + .../modules/audio_processing/transient/common.h | 27 + .../transient/daubechies_8_wavelet_coeffs.h | 44 + .../audio_processing/transient/dyadic_decimator.h | 68 + .../transient/dyadic_decimator_unittest.cc | 111 + .../audio_processing/transient/file_utils.cc | 257 ++ .../audio_processing/transient/file_utils.h | 117 + .../transient/file_utils_unittest.cc | 501 +++ .../audio_processing/transient/moving_moments.cc | 50 + .../audio_processing/transient/moving_moments.h | 53 + .../transient/moving_moments_unittest.cc | 207 ++ .../transient/test/plotDetection.m | 22 + .../transient/test/readDetection.m | 26 + .../audio_processing/transient/test/readPCM.m | 26 + .../transient/transient_detector.cc | 176 + .../transient/transient_detector.h | 89 + .../transient/transient_detector_unittest.cc | 95 + .../transient/transient_suppression_test.cc | 238 ++ .../transient/transient_suppressor.h | 75 + .../transient_suppressor_api_gn/moz.build | 201 ++ .../transient/transient_suppressor_impl.cc | 455 +++ .../transient/transient_suppressor_impl.h | 115 + .../transient_suppressor_impl_gn/moz.build | 236 ++ .../transient/transient_suppressor_unittest.cc | 175 + .../transient/voice_probability_delay_unit.cc | 56 + .../transient/voice_probability_delay_unit.h | 43 + .../voice_probability_delay_unit_gn/moz.build | 221 ++ .../voice_probability_delay_unit_unittest.cc | 108 + .../audio_processing/transient/windows_private.h | 557 ++++ .../modules/audio_processing/transient/wpd_node.cc | 72 + .../modules/audio_processing/transient/wpd_node.h | 45 + .../transient/wpd_node_unittest.cc | 64 + .../modules/audio_processing/transient/wpd_tree.cc | 118 + .../modules/audio_processing/transient/wpd_tree.h | 92 + .../transient/wpd_tree_unittest.cc | 177 + .../modules/audio_processing/utility/BUILD.gn | 79 + .../modules/audio_processing/utility/DEPS | 3 + .../utility/cascaded_biquad_filter.cc | 126 + .../utility/cascaded_biquad_filter.h | 80 + .../utility/cascaded_biquad_filter_gn/moz.build | 221 ++ .../utility/cascaded_biquad_filter_unittest.cc | 157 + .../audio_processing/utility/delay_estimator.cc | 708 ++++ .../audio_processing/utility/delay_estimator.h | 257 ++ .../utility/delay_estimator_internal.h | 51 + .../utility/delay_estimator_unittest.cc | 621 ++++ .../utility/delay_estimator_wrapper.cc | 489 +++ .../utility/delay_estimator_wrapper.h | 248 ++ .../utility/legacy_delay_estimator_gn/moz.build | 222 ++ .../audio_processing/utility/pffft_wrapper.cc | 135 + .../audio_processing/utility/pffft_wrapper.h | 94 + .../utility/pffft_wrapper_gn/moz.build | 221 ++ .../utility/pffft_wrapper_unittest.cc | 182 ++ .../modules/audio_processing/vad/BUILD.gn | 69 + .../modules/audio_processing/vad/common.h | 29 + .../libwebrtc/modules/audio_processing/vad/gmm.cc | 61 + .../libwebrtc/modules/audio_processing/vad/gmm.h | 45 + .../modules/audio_processing/vad/gmm_unittest.cc | 65 + .../audio_processing/vad/noise_gmm_tables.h | 82 + .../audio_processing/vad/pitch_based_vad.cc | 120 + .../modules/audio_processing/vad/pitch_based_vad.h | 57 + .../vad/pitch_based_vad_unittest.cc | 75 + .../modules/audio_processing/vad/pitch_internal.cc | 55 + .../modules/audio_processing/vad/pitch_internal.h | 30 + .../vad/pitch_internal_unittest.cc | 54 + .../audio_processing/vad/pole_zero_filter.cc | 107 + .../audio_processing/vad/pole_zero_filter.h | 51 + .../vad/pole_zero_filter_unittest.cc | 103 + .../modules/audio_processing/vad/standalone_vad.cc | 91 + .../modules/audio_processing/vad/standalone_vad.h | 69 + .../vad/standalone_vad_unittest.cc | 107 + .../modules/audio_processing/vad/vad_audio_proc.cc | 275 ++ .../modules/audio_processing/vad/vad_audio_proc.h | 90 + .../audio_processing/vad/vad_audio_proc_internal.h | 81 + .../vad/vad_audio_proc_unittest.cc | 62 + .../audio_processing/vad/vad_circular_buffer.cc | 135 + .../audio_processing/vad/vad_circular_buffer.h | 69 + .../vad/vad_circular_buffer_unittest.cc | 134 + .../modules/audio_processing/vad/vad_gn/moz.build | 239 ++ .../vad/voice_activity_detector.cc | 85 + .../audio_processing/vad/voice_activity_detector.h | 74 + .../vad/voice_activity_detector_unittest.cc | 168 + .../audio_processing/vad/voice_gmm_tables.h | 77 + 676 files changed, 118574 insertions(+) create mode 100644 third_party/libwebrtc/modules/audio_processing/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/gain_control.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/utility.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc/utility.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h create mode 100644 third_party/libwebrtc/modules/audio_processing/api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/debug.proto create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2.h create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/aec_dump.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/fast_math.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/histograms.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/histograms.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_config.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h create mode 100644 third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level.h create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c create mode 100644 third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml create mode 100644 third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/apmtest.m create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/performance_timer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py create mode 100755 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py create mode 100644 third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/test_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/test_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/test/unittest.proto create mode 100644 third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/windows_private.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h create mode 100644 third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/DEPS create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/common.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc create mode 100644 third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h (limited to 'third_party/libwebrtc/modules/audio_processing') diff --git a/third_party/libwebrtc/modules/audio_processing/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/BUILD.gn new file mode 100644 index 0000000000..64e83a006b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/BUILD.gn @@ -0,0 +1,677 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../webrtc.gni") +if (rtc_enable_protobuf) { + import("//third_party/protobuf/proto_library.gni") +} + +config("apm_debug_dump") { + if (apm_debug_dump) { + defines = [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines = [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } +} + +rtc_library("api") { + visibility = [ "*" ] + sources = [ + "include/audio_processing.cc", + "include/audio_processing.h", + ] + deps = [ + ":audio_frame_view", + ":audio_processing_statistics", + "../../api:array_view", + "../../api:scoped_refptr", + "../../api/audio:aec3_config", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../rtc_base:macromagic", + "../../rtc_base:refcount", + "../../rtc_base:stringutils", + "../../rtc_base/system:arch", + "../../rtc_base/system:file_wrapper", + "../../rtc_base/system:rtc_export", + "agc:gain_control_interface", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("audio_frame_proxies") { + visibility = [ "*" ] + sources = [ + "include/audio_frame_proxies.cc", + "include/audio_frame_proxies.h", + ] + deps = [ + ":api", + ":audio_frame_view", + "../../api/audio:audio_frame_api", + ] +} + +rtc_library("audio_buffer") { + visibility = [ "*" ] + + configs += [ ":apm_debug_dump" ] + + sources = [ + "audio_buffer.cc", + "audio_buffer.h", + "splitting_filter.cc", + "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", + ] + + defines = [] + + deps = [ + ":api", + "../../api:array_view", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:checks", + ] +} + +rtc_library("high_pass_filter") { + visibility = [ "*" ] + + sources = [ + "high_pass_filter.cc", + "high_pass_filter.h", + ] + + defines = [] + + deps = [ + ":audio_buffer", + "../../api:array_view", + "../../rtc_base:checks", + "utility:cascaded_biquad_filter", + ] +} + +rtc_source_set("aec_dump_interface") { + visibility = [ "*" ] + sources = [ + "include/aec_dump.cc", + "include/aec_dump.h", + ] + + deps = [ + ":api", + ":audio_frame_view", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("gain_controller2") { + configs += [ ":apm_debug_dump" ] + sources = [ + "gain_controller2.cc", + "gain_controller2.h", + ] + defines = [] + deps = [ + ":aec_dump_interface", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_view", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:stringutils", + "../../system_wrappers:field_trial", + "agc2:adaptive_digital_gain_controller", + "agc2:common", + "agc2:cpu_features", + "agc2:fixed_digital", + "agc2:gain_applier", + "agc2:input_volume_controller", + "agc2:noise_level_estimator", + "agc2:saturation_protector", + "agc2:speech_level_estimator", + "agc2:vad_wrapper", + ] +} + +rtc_library("audio_processing") { + visibility = [ "*" ] + configs += [ ":apm_debug_dump" ] + sources = [ + "audio_processing_builder_impl.cc", + "audio_processing_impl.cc", + "audio_processing_impl.h", + "echo_control_mobile_impl.cc", + "echo_control_mobile_impl.h", + "gain_control_impl.cc", + "gain_control_impl.h", + "render_queue_item_verifier.h", + ] + + defines = [] + deps = [ + ":aec_dump_interface", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_proxies", + ":audio_frame_view", + ":audio_processing_statistics", + ":gain_controller2", + ":high_pass_filter", + ":optionally_built_submodule_creators", + ":rms_level", + "../../api:array_view", + "../../api:function_view", + "../../api:make_ref_counted", + "../../api/audio:aec3_config", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../audio/utility:audio_frame_operations", + "../../common_audio:common_audio_c", + "../../common_audio/third_party/ooura:fft_size_256", + "../../rtc_base:checks", + "../../rtc_base:event_tracer", + "../../rtc_base:gtest_prod", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:macromagic", + "../../rtc_base:safe_minmax", + "../../rtc_base:sanitizer", + "../../rtc_base:swap_queue", + "../../rtc_base:timeutils", + "../../rtc_base/experiments:field_trial_parser", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:rtc_export", + "../../system_wrappers", + "../../system_wrappers:denormal_disabler", + "../../system_wrappers:field_trial", + "../../system_wrappers:metrics", + "aec3", + "aec_dump:aec_dump", + "aecm:aecm_core", + "agc", + "agc:gain_control_interface", + "agc:legacy_agc", + "agc2:input_volume_stats_reporter", + "capture_levels_adjuster", + "ns", + "transient:transient_suppressor_api", + "vad", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + deps += [ + "../../common_audio", + "../../common_audio:fir_filter", + "../../common_audio:fir_filter_factory", + "../../system_wrappers", + ] + + if (rtc_enable_protobuf) { + deps += [ "aec_dump:aec_dump_impl" ] + } else { + deps += [ "aec_dump:null_aec_dump_factory" ] + } +} + +rtc_library("residual_echo_detector") { + poisonous = [ "default_echo_detector" ] + configs += [ ":apm_debug_dump" ] + sources = [ + "echo_detector/circular_buffer.cc", + "echo_detector/circular_buffer.h", + "echo_detector/mean_variance_estimator.cc", + "echo_detector/mean_variance_estimator.h", + "echo_detector/moving_max.cc", + "echo_detector/moving_max.h", + "echo_detector/normalized_covariance_estimator.cc", + "echo_detector/normalized_covariance_estimator.h", + "residual_echo_detector.cc", + "residual_echo_detector.h", + ] + deps = [ + ":api", + ":apm_logging", + "../../api:array_view", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("optionally_built_submodule_creators") { + sources = [ + "optionally_built_submodule_creators.cc", + "optionally_built_submodule_creators.h", + ] + deps = [ + "transient:transient_suppressor_api", + "transient:transient_suppressor_impl", + ] +} + +rtc_source_set("rms_level") { + visibility = [ "*" ] + sources = [ + "rms_level.cc", + "rms_level.h", + ] + deps = [ + "../../api:array_view", + "../../rtc_base:checks", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("audio_processing_statistics") { + visibility = [ "*" ] + sources = [ + "include/audio_processing_statistics.cc", + "include/audio_processing_statistics.h", + ] + deps = [ "../../rtc_base/system:rtc_export" ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("audio_frame_view") { + sources = [ "include/audio_frame_view.h" ] + deps = [ "../../api:array_view" ] +} + +if (rtc_enable_protobuf) { + proto_library("audioproc_debug_proto") { + sources = [ "debug.proto" ] + + proto_out_dir = "modules/audio_processing" + } +} + +rtc_library("apm_logging") { + configs += [ ":apm_debug_dump" ] + sources = [ + "logging/apm_data_dumper.cc", + "logging/apm_data_dumper.h", + ] + deps = [ + "../../api:array_view", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:stringutils", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + defines = [] +} + +if (rtc_include_tests) { + rtc_source_set("mocks") { + testonly = true + sources = [ "include/mock_audio_processing.h" ] + deps = [ + ":aec_dump_interface", + ":api", + ":audio_buffer", + ":audio_processing", + ":audio_processing_statistics", + "../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + if (!build_with_chromium) { + group("audio_processing_tests") { + testonly = true + deps = [ + ":audioproc_test_utils", + "transient:click_annotate", + "transient:transient_suppression_test", + ] + + if (rtc_enable_protobuf) { + deps += [ + ":audioproc_unittest_proto", + "aec_dump:aec_dump_unittests", + "test/conversational_speech", + "test/py_quality_assessment", + ] + } + } + + rtc_library("audio_processing_unittests") { + testonly = true + + configs += [ ":apm_debug_dump" ] + sources = [ + "audio_buffer_unittest.cc", + "audio_frame_view_unittest.cc", + "echo_control_mobile_unittest.cc", + "gain_controller2_unittest.cc", + "splitting_filter_unittest.cc", + "test/fake_recording_device_unittest.cc", + ] + + deps = [ + ":analog_mic_simulation", + ":api", + ":apm_logging", + ":audio_buffer", + ":audio_frame_view", + ":audio_processing", + ":audioproc_test_utils", + ":gain_controller2", + ":high_pass_filter", + ":mocks", + "../../api:array_view", + "../../api:make_ref_counted", + "../../api:scoped_refptr", + "../../api/audio:aec3_config", + "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", + "../../common_audio", + "../../common_audio:common_audio_c", + "../../rtc_base:checks", + "../../rtc_base:gtest_prod", + "../../rtc_base:ignore_wundef", + "../../rtc_base:macromagic", + "../../rtc_base:platform_thread", + "../../rtc_base:protobuf_utils", + "../../rtc_base:random", + "../../rtc_base:rtc_base_tests_utils", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "../../rtc_base:stringutils", + "../../rtc_base:swap_queue", + "../../rtc_base:task_queue_for_test", + "../../rtc_base:threading", + "../../rtc_base/synchronization:mutex", + "../../rtc_base/system:arch", + "../../rtc_base/system:file_wrapper", + "../../system_wrappers", + "../../system_wrappers:denormal_disabler", + "../../test:field_trial", + "../../test:fileutils", + "../../test:rtc_expect_death", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "aec_dump:mock_aec_dump_unittests", + "agc:agc_unittests", + "agc2:adaptive_digital_gain_controller_unittest", + "agc2:biquad_filter_unittests", + "agc2:fixed_digital_unittests", + "agc2:gain_applier_unittest", + "agc2:input_volume_controller_unittests", + "agc2:input_volume_stats_reporter_unittests", + "agc2:noise_estimator_unittests", + "agc2:saturation_protector_unittest", + "agc2:speech_level_estimator_unittest", + "agc2:test_utils", + "agc2:vad_wrapper_unittests", + "agc2/rnn_vad:unittests", + "capture_levels_adjuster", + "capture_levels_adjuster:capture_levels_adjuster_unittests", + "test/conversational_speech:unittest", + "transient:transient_suppression_unittests", + "utility:legacy_delay_estimator_unittest", + "utility:pffft_wrapper_unittest", + "vad:vad_unittests", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + defines = [] + + if (rtc_prefer_fixed_point) { + defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ] + } else { + defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ] + } + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ] + deps += [ + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + ":audioproc_unittest_proto", + ":optionally_built_submodule_creators", + ":residual_echo_detector", + ":rms_level", + ":runtime_settings_protobuf_utils", + "../../api/audio:audio_frame_api", + "../../api/audio:echo_control", + "../../rtc_base:rtc_base_tests_utils", + "../../rtc_base:rtc_task_queue", + "aec_dump", + "aec_dump:aec_dump_unittests", + ] + absl_deps += [ "//third_party/abseil-cpp/absl/flags:flag" ] + sources += [ + "audio_processing_impl_locking_unittest.cc", + "audio_processing_impl_unittest.cc", + "audio_processing_unittest.cc", + "echo_control_mobile_bit_exact_unittest.cc", + "echo_detector/circular_buffer_unittest.cc", + "echo_detector/mean_variance_estimator_unittest.cc", + "echo_detector/moving_max_unittest.cc", + "echo_detector/normalized_covariance_estimator_unittest.cc", + "gain_control_unittest.cc", + "high_pass_filter_unittest.cc", + "residual_echo_detector_unittest.cc", + "rms_level_unittest.cc", + "test/debug_dump_replayer.cc", + "test/debug_dump_replayer.h", + "test/debug_dump_test.cc", + "test/echo_canceller_test_tools.cc", + "test/echo_canceller_test_tools.h", + "test/echo_canceller_test_tools_unittest.cc", + "test/echo_control_mock.h", + "test/test_utils.h", + ] + } + } + } + + rtc_library("audio_processing_perf_tests") { + testonly = true + configs += [ ":apm_debug_dump" ] + + sources = [ "audio_processing_performance_unittest.cc" ] + deps = [ + ":audio_processing", + ":audioproc_test_utils", + "../../api:array_view", + "../../api/numerics", + "../../api/test/metrics:global_metrics_logger_and_exporter", + "../../api/test/metrics:metric", + "../../rtc_base:platform_thread", + "../../rtc_base:protobuf_utils", + "../../rtc_base:random", + "../../rtc_base:rtc_event", + "../../rtc_base:safe_conversions", + "../../system_wrappers", + "../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + rtc_library("analog_mic_simulation") { + sources = [ + "test/fake_recording_device.cc", + "test/fake_recording_device.h", + ] + deps = [ + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:safe_conversions", + "../../rtc_base:safe_minmax", + "agc2:gain_map", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + } + + if (rtc_enable_protobuf && !build_with_chromium) { + rtc_library("audioproc_f_impl") { + testonly = true + configs += [ ":apm_debug_dump" ] + sources = [ + "test/aec_dump_based_simulator.cc", + "test/aec_dump_based_simulator.h", + "test/api_call_statistics.cc", + "test/api_call_statistics.h", + "test/audio_processing_simulator.cc", + "test/audio_processing_simulator.h", + "test/audioproc_float_impl.cc", + "test/audioproc_float_impl.h", + "test/wav_based_simulator.cc", + "test/wav_based_simulator.h", + ] + + deps = [ + ":analog_mic_simulation", + ":api", + ":apm_logging", + ":audio_processing", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + ":runtime_settings_protobuf_utils", + "../../api/audio:aec3_config_json", + "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:logging", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_json", + "../../rtc_base:safe_conversions", + "../../rtc_base:stringutils", + "../../rtc_base:task_queue_for_test", + "../../rtc_base:timeutils", + "../../rtc_base/system:file_wrapper", + "../../system_wrappers", + "../../system_wrappers:field_trial", + "../../test:test_support", + "aec_dump", + "aec_dump:aec_dump_impl", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } # audioproc_f_impl + } + + if (rtc_enable_protobuf) { + proto_library("audioproc_unittest_proto") { + sources = [ "test/unittest.proto" ] + proto_out_dir = "modules/audio_processing/test" + } + + rtc_library("audioproc_protobuf_utils") { + sources = [ + "test/protobuf_utils.cc", + "test/protobuf_utils.h", + ] + + deps = [ + ":audioproc_debug_proto", + "../../rtc_base:checks", + "../../rtc_base:ignore_wundef", + "../../rtc_base:protobuf_utils", + "../../rtc_base/system:arch", + ] + } + + rtc_library("runtime_settings_protobuf_utils") { + testonly = true + sources = [ + "test/runtime_setting_util.cc", + "test/runtime_setting_util.h", + ] + + deps = [ + ":api", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + "../../rtc_base:checks", + ] + } + } +} + +rtc_library("audioproc_test_utils") { + visibility = [ "*" ] + testonly = true + sources = [ + "test/audio_buffer_tools.cc", + "test/audio_buffer_tools.h", + "test/audio_processing_builder_for_testing.cc", + "test/audio_processing_builder_for_testing.h", + "test/bitexactness_tools.cc", + "test/bitexactness_tools.h", + "test/performance_timer.cc", + "test/performance_timer.h", + "test/simulator_buffers.cc", + "test/simulator_buffers.h", + "test/test_utils.cc", + "test/test_utils.h", + ] + + configs += [ ":apm_debug_dump" ] + + deps = [ + ":api", + ":audio_buffer", + ":audio_processing", + "../../api:array_view", + "../../api/audio:audio_frame_api", + "../../common_audio", + "../../rtc_base:checks", + "../../rtc_base:random", + "../../rtc_base/system:arch", + "../../system_wrappers", + "../../test:fileutils", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/DEPS b/third_party/libwebrtc/modules/audio_processing/DEPS new file mode 100644 index 0000000000..79fd071785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/DEPS @@ -0,0 +1,14 @@ +include_rules = [ + "+audio/utility/audio_frame_operations.h", + "+common_audio", + "+system_wrappers", +] + +specific_include_rules = { + ".*test\.cc": [ + "+rtc_tools", + # Android platform build has different paths. + "+gtest", + "+external/webrtc", + ], +} diff --git a/third_party/libwebrtc/modules/audio_processing/OWNERS b/third_party/libwebrtc/modules/audio_processing/OWNERS new file mode 100644 index 0000000000..f5dc59ea35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/OWNERS @@ -0,0 +1,8 @@ +alessiob@webrtc.org +devicentepena@webrtc.org +gustaf@webrtc.org +henrik.lundin@webrtc.org +ivoc@webrtc.org +lionelk@webrtc.org +peah@webrtc.org +saza@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn new file mode 100644 index 0000000000..c29b893b7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/BUILD.gn @@ -0,0 +1,384 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("aec3") { + visibility = [ "*" ] + configs += [ "..:apm_debug_dump" ] + sources = [ + "adaptive_fir_filter.cc", + "adaptive_fir_filter_erl.cc", + "aec3_common.cc", + "aec3_fft.cc", + "aec_state.cc", + "aec_state.h", + "alignment_mixer.cc", + "alignment_mixer.h", + "api_call_jitter_metrics.cc", + "api_call_jitter_metrics.h", + "block.h", + "block_buffer.cc", + "block_delay_buffer.cc", + "block_delay_buffer.h", + "block_framer.cc", + "block_framer.h", + "block_processor.cc", + "block_processor.h", + "block_processor_metrics.cc", + "block_processor_metrics.h", + "clockdrift_detector.cc", + "clockdrift_detector.h", + "coarse_filter_update_gain.cc", + "coarse_filter_update_gain.h", + "comfort_noise_generator.cc", + "comfort_noise_generator.h", + "config_selector.cc", + "config_selector.h", + "decimator.cc", + "decimator.h", + "delay_estimate.h", + "dominant_nearend_detector.cc", + "dominant_nearend_detector.h", + "downsampled_render_buffer.cc", + "downsampled_render_buffer.h", + "echo_audibility.cc", + "echo_audibility.h", + "echo_canceller3.cc", + "echo_canceller3.h", + "echo_path_delay_estimator.cc", + "echo_path_delay_estimator.h", + "echo_path_variability.cc", + "echo_path_variability.h", + "echo_remover.cc", + "echo_remover.h", + "echo_remover_metrics.cc", + "echo_remover_metrics.h", + "erl_estimator.cc", + "erl_estimator.h", + "erle_estimator.cc", + "erle_estimator.h", + "fft_buffer.cc", + "filter_analyzer.cc", + "filter_analyzer.h", + "frame_blocker.cc", + "frame_blocker.h", + "fullband_erle_estimator.cc", + "fullband_erle_estimator.h", + "matched_filter.cc", + "matched_filter_lag_aggregator.cc", + "matched_filter_lag_aggregator.h", + "moving_average.cc", + "moving_average.h", + "multi_channel_content_detector.cc", + "multi_channel_content_detector.h", + "nearend_detector.h", + "refined_filter_update_gain.cc", + "refined_filter_update_gain.h", + "render_buffer.cc", + "render_delay_buffer.cc", + "render_delay_buffer.h", + "render_delay_controller.cc", + "render_delay_controller.h", + "render_delay_controller_metrics.cc", + "render_delay_controller_metrics.h", + "render_signal_analyzer.cc", + "render_signal_analyzer.h", + "residual_echo_estimator.cc", + "residual_echo_estimator.h", + "reverb_decay_estimator.cc", + "reverb_decay_estimator.h", + "reverb_frequency_response.cc", + "reverb_frequency_response.h", + "reverb_model.cc", + "reverb_model.h", + "reverb_model_estimator.cc", + "reverb_model_estimator.h", + "signal_dependent_erle_estimator.cc", + "signal_dependent_erle_estimator.h", + "spectrum_buffer.cc", + "stationarity_estimator.cc", + "stationarity_estimator.h", + "subband_erle_estimator.cc", + "subband_erle_estimator.h", + "subband_nearend_detector.cc", + "subband_nearend_detector.h", + "subtractor.cc", + "subtractor.h", + "subtractor_output.cc", + "subtractor_output.h", + "subtractor_output_analyzer.cc", + "subtractor_output_analyzer.h", + "suppression_filter.cc", + "suppression_filter.h", + "suppression_gain.cc", + "suppression_gain.h", + "transparent_mode.cc", + "transparent_mode.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":matched_filter", + ":render_buffer", + ":vector_math", + "..:apm_logging", + "..:audio_buffer", + "..:high_pass_filter", + "../../../api:array_view", + "../../../api/audio:aec3_config", + "../../../api/audio:echo_control", + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:race_checker", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:swap_queue", + "../../../rtc_base/experiments:field_trial_parser", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + + if (target_cpu == "x86" || target_cpu == "x64") { + deps += [ ":aec3_avx2" ] + } +} + +rtc_source_set("aec3_common") { + sources = [ "aec3_common.h" ] +} + +rtc_source_set("aec3_fft") { + sources = [ "aec3_fft.h" ] + deps = [ + ":aec3_common", + ":fft_data", + "../../../api:array_view", + "../../../common_audio/third_party/ooura:fft_size_128", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("render_buffer") { + sources = [ + "block.h", + "block_buffer.h", + "fft_buffer.h", + "render_buffer.h", + "spectrum_buffer.h", + ] + deps = [ + ":aec3_common", + ":fft_data", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("adaptive_fir_filter") { + sources = [ "adaptive_fir_filter.h" ] + deps = [ + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":render_buffer", + "..:apm_logging", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("adaptive_fir_filter_erl") { + sources = [ "adaptive_fir_filter_erl.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("matched_filter") { + sources = [ "matched_filter.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base:gtest_prod", + "../../../rtc_base/system:arch", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("vector_math") { + sources = [ "vector_math.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base/system:arch", + ] +} + +rtc_source_set("fft_data") { + sources = [ "fft_data.h" ] + deps = [ + ":aec3_common", + "../../../api:array_view", + "../../../rtc_base/system:arch", + ] +} + +if (target_cpu == "x86" || target_cpu == "x64") { + rtc_library("aec3_avx2") { + configs += [ "..:apm_debug_dump" ] + sources = [ + "adaptive_fir_filter_avx2.cc", + "adaptive_fir_filter_erl_avx2.cc", + "fft_data_avx2.cc", + "matched_filter_avx2.cc", + "vector_math_avx2.cc", + ] + + cflags = [ + "-mavx", + "-mavx2", + "-mfma", + ] + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":fft_data", + ":matched_filter", + ":vector_math", + "../../../api:array_view", + "../../../rtc_base:checks", + ] + } +} + +if (rtc_include_tests) { + rtc_library("aec3_unittests") { + testonly = true + + configs += [ "..:apm_debug_dump" ] + sources = [ + "mock/mock_block_processor.cc", + "mock/mock_block_processor.h", + "mock/mock_echo_remover.cc", + "mock/mock_echo_remover.h", + "mock/mock_render_delay_buffer.cc", + "mock/mock_render_delay_buffer.h", + "mock/mock_render_delay_controller.cc", + "mock/mock_render_delay_controller.h", + ] + + deps = [ + ":adaptive_fir_filter", + ":adaptive_fir_filter_erl", + ":aec3", + ":aec3_common", + ":aec3_fft", + ":fft_data", + ":matched_filter", + ":render_buffer", + ":vector_math", + "..:apm_logging", + "..:audio_buffer", + "..:audio_processing", + "..:high_pass_filter", + "../../../api:array_view", + "../../../api/audio:aec3_config", + "../../../rtc_base:checks", + "../../../rtc_base:macromagic", + "../../../rtc_base:random", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:test_support", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + + defines = [] + + if (rtc_enable_protobuf) { + sources += [ + "adaptive_fir_filter_erl_unittest.cc", + "adaptive_fir_filter_unittest.cc", + "aec3_fft_unittest.cc", + "aec_state_unittest.cc", + "alignment_mixer_unittest.cc", + "api_call_jitter_metrics_unittest.cc", + "block_delay_buffer_unittest.cc", + "block_framer_unittest.cc", + "block_processor_metrics_unittest.cc", + "block_processor_unittest.cc", + "clockdrift_detector_unittest.cc", + "coarse_filter_update_gain_unittest.cc", + "comfort_noise_generator_unittest.cc", + "config_selector_unittest.cc", + "decimator_unittest.cc", + "echo_canceller3_unittest.cc", + "echo_path_delay_estimator_unittest.cc", + "echo_path_variability_unittest.cc", + "echo_remover_metrics_unittest.cc", + "echo_remover_unittest.cc", + "erl_estimator_unittest.cc", + "erle_estimator_unittest.cc", + "fft_data_unittest.cc", + "filter_analyzer_unittest.cc", + "frame_blocker_unittest.cc", + "matched_filter_lag_aggregator_unittest.cc", + "matched_filter_unittest.cc", + "moving_average_unittest.cc", + "multi_channel_content_detector_unittest.cc", + "refined_filter_update_gain_unittest.cc", + "render_buffer_unittest.cc", + "render_delay_buffer_unittest.cc", + "render_delay_controller_metrics_unittest.cc", + "render_delay_controller_unittest.cc", + "render_signal_analyzer_unittest.cc", + "residual_echo_estimator_unittest.cc", + "reverb_model_estimator_unittest.cc", + "signal_dependent_erle_estimator_unittest.cc", + "subtractor_unittest.cc", + "suppression_filter_unittest.cc", + "suppression_gain_unittest.cc", + "vector_math_unittest.cc", + ] + } + + if (!build_with_chromium) { + deps += [ "..:audio_processing_unittests" ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc new file mode 100644 index 0000000000..917aa951ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -0,0 +1,744 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include + +#include +#include + +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + float tmp = + H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j]; + (*H2)[p][j] = std::max((*H2)[p][j], tmp); + } + } + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Neon( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const float32x4_t re = vld1q_f32(&H_p_ch.re[j]); + const float32x4_t im = vld1q_f32(&H_p_ch.im[j]); + float32x4_t H2_new = vmulq_f32(re, re); + H2_new = vmlaq_f32(H2_new, im, im); + float32x4_t H2_p_j = vld1q_f32(&H2_p[j]); + H2_p_j = vmaxq_f32(H2_p_j, H2_new); + vst1q_f32(&H2_p[j], H2_p_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Sse2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + // constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const __m128 re = _mm_loadu_ps(&H_p_ch.re[j]); + const __m128 re2 = _mm_mul_ps(re, re); + const __m128 im = _mm_loadu_ps(&H_p_ch.im[j]); + const __m128 im2 = _mm_mul_ps(im, im); + const __m128 H2_new = _mm_add_ps(re2, im2); + __m128 H2_k_j = _mm_loadu_ps(&H2_p[j]); + H2_k_j = _mm_max_ps(H2_k_j, H2_new); + _mm_storeu_ps(&H2_p[j], H2_k_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} +#endif + +// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)). +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + size_t index = render_buffer.Position(); + const size_t num_render_channels = render_buffer_data[index].size(); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& X_p_ch = render_buffer_data[index][ch]; + FftData& H_p_ch = (*H)[p][ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k]; + H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k]; + } + } + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Adapts the filter partitions. (Neon variant) +void AdaptPartitions_Neon(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t G_re = vld1q_f32(&G.re[k]); + const float32x4_t G_im = vld1q_f32(&G.im[k]); + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); + const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]); + const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]); + const float32x4_t a = vmulq_f32(X_re, G_re); + const float32x4_t e = vmlaq_f32(a, X_im, G_im); + const float32x4_t c = vmulq_f32(X_re, G_im); + const float32x4_t f = vmlsq_f32(c, X_im, G_re); + const float32x4_t g = vaddq_f32(H_re, e); + const float32x4_t h = vaddq_f32(H_im, f); + vst1q_f32(&H_p_ch.re[k], g); + vst1q_f32(&H_p_ch.im[k], h); + } + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Adapts the filter partitions. (SSE2 variant) +void AdaptPartitions_Sse2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 G_re = _mm_loadu_ps(&G.re[k]); + const __m128 G_im = _mm_loadu_ps(&G.im[k]); + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); + const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]); + const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]); + const __m128 a = _mm_mul_ps(X_re, G_re); + const __m128 b = _mm_mul_ps(X_im, G_im); + const __m128 c = _mm_mul_ps(X_re, G_im); + const __m128 d = _mm_mul_ps(X_im, G_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + const __m128 g = _mm_add_ps(H_re, e); + const __m128 h = _mm_add_ps(H_im, f); + _mm_storeu_ps(&H_p_ch.re[k], g); + _mm_storeu_ps(&H_p_ch.im[k], h); + } + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); +} +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + size_t index = render_buffer.Position(); + const size_t num_render_channels = render_buffer_data[index].size(); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(num_render_channels, H[p].size()); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& X_p_ch = render_buffer_data[index][ch]; + const FftData& H_p_ch = H[p][ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k]; + S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k]; + } + } + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Produces the filter output (Neon variant). +void ApplyFilter_Neon(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + // const RenderBuffer& render_buffer, + // rtc::ArrayView H, + // FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->Clear(); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); + const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]); + const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]); + const float32x4_t S_re = vld1q_f32(&S->re[k]); + const float32x4_t S_im = vld1q_f32(&S->im[k]); + const float32x4_t a = vmulq_f32(X_re, H_re); + const float32x4_t e = vmlsq_f32(a, X_im, H_im); + const float32x4_t c = vmulq_f32(X_re, H_im); + const float32x4_t f = vmlaq_f32(c, X_im, H_re); + const float32x4_t g = vaddq_f32(S_re, e); + const float32x4_t h = vaddq_f32(S_im, f); + vst1q_f32(&S->re[k], g); + vst1q_f32(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Produces the filter output (SSE2 variant). +void ApplyFilter_Sse2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + // const RenderBuffer& render_buffer, + // rtc::ArrayView H, + // FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); + const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]); + const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]); + const __m128 S_re = _mm_loadu_ps(&S->re[k]); + const __m128 S_im = _mm_loadu_ps(&S->im[k]); + const __m128 a = _mm_mul_ps(X_re, H_re); + const __m128 b = _mm_mul_ps(X_im, H_im); + const __m128 c = _mm_mul_ps(X_re, H_im); + const __m128 d = _mm_mul_ps(X_im, H_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(S_re, e); + const __m128 h = _mm_add_ps(S_im, f); + _mm_storeu_ps(&S->re[k], g); + _mm_storeu_ps(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} +#endif + +} // namespace aec3 + +namespace { + +// Ensures that the newly added filter partitions after a size increase are set +// to zero. +void ZeroFilter(size_t old_size, + size_t new_size, + std::vector>* H) { + RTC_DCHECK_GE(H->size(), old_size); + RTC_DCHECK_GE(H->size(), new_size); + + for (size_t p = old_size; p < new_size; ++p) { + RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size()); + for (size_t ch = 0; ch < (*H)[0].size(); ++ch) { + (*H)[p][ch].Clear(); + } + } +} + +} // namespace + +AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions, + size_t initial_size_partitions, + size_t size_change_duration_blocks, + size_t num_render_channels, + Aec3Optimization optimization, + ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + fft_(), + optimization_(optimization), + num_render_channels_(num_render_channels), + max_size_partitions_(max_size_partitions), + size_change_duration_blocks_( + static_cast(size_change_duration_blocks)), + current_size_partitions_(initial_size_partitions), + target_size_partitions_(initial_size_partitions), + old_target_size_partitions_(initial_size_partitions), + H_(max_size_partitions_, std::vector(num_render_channels_)) { + RTC_DCHECK(data_dumper_); + RTC_DCHECK_GE(max_size_partitions, initial_size_partitions); + + RTC_DCHECK_LT(0, size_change_duration_blocks_); + one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_; + + ZeroFilter(0, max_size_partitions_, &H_); + + SetSizePartitions(current_size_partitions_, true); +} + +AdaptiveFirFilter::~AdaptiveFirFilter() = default; + +void AdaptiveFirFilter::HandleEchoPathChange() { + // TODO(peah): Check the value and purpose of the code below. + ZeroFilter(current_size_partitions_, max_size_partitions_, &H_); +} + +void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) { + RTC_DCHECK_EQ(max_size_partitions_, H_.capacity()); + RTC_DCHECK_LE(size, max_size_partitions_); + + target_size_partitions_ = std::min(max_size_partitions_, size); + if (immediate_effect) { + size_t old_size_partitions_ = current_size_partitions_; + current_size_partitions_ = old_target_size_partitions_ = + target_size_partitions_; + ZeroFilter(old_size_partitions_, current_size_partitions_, &H_); + + partition_to_constrain_ = + std::min(partition_to_constrain_, current_size_partitions_ - 1); + size_change_counter_ = 0; + } else { + size_change_counter_ = size_change_duration_blocks_; + } +} + +void AdaptiveFirFilter::UpdateSize() { + RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_); + size_t old_size_partitions_ = current_size_partitions_; + if (size_change_counter_ > 0) { + --size_change_counter_; + + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + size_change_counter_ * one_by_size_change_duration_blocks_; + + current_size_partitions_ = average(old_target_size_partitions_, + target_size_partitions_, change_factor); + + partition_to_constrain_ = + std::min(partition_to_constrain_, current_size_partitions_ - 1); + } else { + current_size_partitions_ = old_target_size_partitions_ = + target_size_partitions_; + } + ZeroFilter(old_size_partitions_, current_size_partitions_, &H_); + RTC_DCHECK_LE(0, size_change_counter_); +} + +void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer, + FftData* S) const { + RTC_DCHECK(S); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S); + break; + case Aec3Optimization::kAvx2: + aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S); + break; +#endif + default: + aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S); + } +} + +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G) { + // Adapt the filter and update the filter size. + AdaptAndUpdateSize(render_buffer, G); + + // Constrain the filter partitions in a cyclic manner. + Constrain(); +} + +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G, + std::vector* impulse_response) { + // Adapt the filter and update the filter size. + AdaptAndUpdateSize(render_buffer, G); + + // Constrain the filter partitions in a cyclic manner. + ConstrainAndUpdateImpulseResponse(impulse_response); +} + +void AdaptiveFirFilter::ComputeFrequencyResponse( + std::vector>* H2) const { + RTC_DCHECK_GE(max_size_partitions_, H2->capacity()); + + H2->resize(current_size_partitions_); + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2); + break; + case Aec3Optimization::kAvx2: + aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2); + break; +#endif + default: + aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2); + } +} + +void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer, + const FftData& G) { + // Update the filter size if needed. + UpdateSize(); + + // Adapt the filter. + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_, + &H_); + break; + case Aec3Optimization::kAvx2: + aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_, + &H_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_, + &H_); + break; +#endif + default: + aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_); + } +} + +// Constrains the partition of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero and updates +// the corresponding values in an externally stored impulse response estimate. +void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse( + std::vector* impulse_response) { + RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_), + impulse_response->capacity()); + impulse_response->resize(GetTimeDomainLength(current_size_partitions_)); + std::array h; + impulse_response->resize(GetTimeDomainLength(current_size_partitions_)); + std::fill( + impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2, + impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2, + 0.f); + + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + fft_.Ifft(H_[partition_to_constrain_][ch], &h); + + static constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + if (ch == 0) { + std::copy( + h.begin(), h.begin() + kFftLengthBy2, + impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2); + } else { + for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2; + k < kFftLengthBy2; ++k, ++j) { + if (fabsf((*impulse_response)[j]) < fabsf(h[k])) { + (*impulse_response)[j] = h[k]; + } + } + } + + fft_.Fft(&h, &H_[partition_to_constrain_][ch]); + } + + partition_to_constrain_ = + partition_to_constrain_ < (current_size_partitions_ - 1) + ? partition_to_constrain_ + 1 + : 0; +} + +// Constrains the a partiton of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero. +void AdaptiveFirFilter::Constrain() { + std::array h; + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + fft_.Ifft(H_[partition_to_constrain_][ch], &h); + + static constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + fft_.Fft(&h, &H_[partition_to_constrain_][ch]); + } + + partition_to_constrain_ = + partition_to_constrain_ < (current_size_partitions_ - 1) + ? partition_to_constrain_ + 1 + : 0; +} + +void AdaptiveFirFilter::ScaleFilter(float factor) { + for (auto& H_p : H_) { + for (auto& H_p_ch : H_p) { + for (auto& re : H_p_ch.re) { + re *= factor; + } + for (auto& im : H_p_ch.im) { + im *= factor; + } + } + } +} + +// Set the filter coefficients. +void AdaptiveFirFilter::SetFilter(size_t num_partitions, + const std::vector>& H) { + const size_t min_num_partitions = + std::min(current_size_partitions_, num_partitions); + for (size_t p = 0; p < min_num_partitions; ++p) { + RTC_DCHECK_EQ(H_[p].size(), H[p].size()); + RTC_DCHECK_EQ(num_render_channels_, H_[p].size()); + + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin()); + std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin()); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h new file mode 100644 index 0000000000..34c06f4367 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ + +#include + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#if defined(WEBRTC_HAS_NEON) +void ComputeFrequencyResponse_Neon( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ComputeFrequencyResponse_Sse2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); + +void ComputeFrequencyResponse_Avx2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2); +#endif + +// Adapts the filter partitions. +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#if defined(WEBRTC_HAS_NEON) +void AdaptPartitions_Neon(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void AdaptPartitions_Sse2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); + +void AdaptPartitions_Avx2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H); +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#if defined(WEBRTC_HAS_NEON) +void ApplyFilter_Neon(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ApplyFilter_Sse2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); + +void ApplyFilter_Avx2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S); +#endif + +} // namespace aec3 + +// Provides a frequency domain adaptive filter functionality. +class AdaptiveFirFilter { + public: + AdaptiveFirFilter(size_t max_size_partitions, + size_t initial_size_partitions, + size_t size_change_duration_blocks, + size_t num_render_channels, + Aec3Optimization optimization, + ApmDataDumper* data_dumper); + + ~AdaptiveFirFilter(); + + AdaptiveFirFilter(const AdaptiveFirFilter&) = delete; + AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete; + + // Produces the output of the filter. + void Filter(const RenderBuffer& render_buffer, FftData* S) const; + + // Adapts the filter and updates an externally stored impulse response + // estimate. + void Adapt(const RenderBuffer& render_buffer, + const FftData& G, + std::vector* impulse_response); + + // Adapts the filter. + void Adapt(const RenderBuffer& render_buffer, const FftData& G); + + // Receives reports that known echo path changes have occured and adjusts + // the filter adaptation accordingly. + void HandleEchoPathChange(); + + // Returns the filter size. + size_t SizePartitions() const { return current_size_partitions_; } + + // Sets the filter size. + void SetSizePartitions(size_t size, bool immediate_effect); + + // Computes the frequency responses for the filter partitions. + void ComputeFrequencyResponse( + std::vector>* H2) const; + + // Returns the maximum number of partitions for the filter. + size_t max_filter_size_partitions() const { return max_size_partitions_; } + + void DumpFilter(absl::string_view name_frequency_domain) { + for (size_t p = 0; p < max_size_partitions_; ++p) { + data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re); + data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im); + } + } + + // Scale the filter impulse response and spectrum by a factor. + void ScaleFilter(float factor); + + // Set the filter coefficients. + void SetFilter(size_t num_partitions, + const std::vector>& H); + + // Gets the filter coefficients. + const std::vector>& GetFilter() const { return H_; } + + private: + // Adapts the filter and updates the filter size. + void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G); + + // Constrain the filter partitions in a cyclic manner. + void Constrain(); + // Constrains the filter in a cyclic manner and updates the corresponding + // values in the supplied impulse response. + void ConstrainAndUpdateImpulseResponse(std::vector* impulse_response); + + // Gradually Updates the current filter size towards the target size. + void UpdateSize(); + + ApmDataDumper* const data_dumper_; + const Aec3Fft fft_; + const Aec3Optimization optimization_; + const size_t num_render_channels_; + const size_t max_size_partitions_; + const int size_change_duration_blocks_; + float one_by_size_change_duration_blocks_; + size_t current_size_partitions_; + size_t target_size_partitions_; + size_t old_target_size_partitions_; + int size_change_counter_ = 0; + std::vector> H_; + size_t partition_to_constrain_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc new file mode 100644 index 0000000000..44d4514275 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +#include "common_audio/intrin.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the frequency response of the filter. +void ComputeFrequencyResponse_Avx2( + size_t num_partitions, + const std::vector>& H, + std::vector>* H2) { + for (auto& H2_ch : *H2) { + H2_ch.fill(0.f); + } + + const size_t num_render_channels = H[0].size(); + RTC_DCHECK_EQ(H.size(), H2->capacity()); + for (size_t p = 0; p < num_partitions; ++p) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size()); + auto& H2_p = (*H2)[p]; + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + for (size_t j = 0; j < kFftLengthBy2; j += 8) { + __m256 re = _mm256_loadu_ps(&H_p_ch.re[j]); + __m256 re2 = _mm256_mul_ps(re, re); + __m256 im = _mm256_loadu_ps(&H_p_ch.im[j]); + re2 = _mm256_fmadd_ps(im, im, re2); + __m256 H2_k_j = _mm256_loadu_ps(&H2_p[j]); + H2_k_j = _mm256_max_ps(H2_k_j, re2); + _mm256_storeu_ps(&H2_p[j], H2_k_j); + } + float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] + + H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new); + } + } +} + +// Adapts the filter partitions. +void AdaptPartitions_Avx2(const RenderBuffer& render_buffer, + const FftData& G, + size_t num_partitions, + std::vector>* H) { + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8; + + size_t X_partition = render_buffer.Position(); + size_t limit = lim1; + size_t p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) { + const __m256 G_re = _mm256_loadu_ps(&G.re[k]); + const __m256 G_im = _mm256_loadu_ps(&G.im[k]); + const __m256 X_re = _mm256_loadu_ps(&X.re[k]); + const __m256 X_im = _mm256_loadu_ps(&X.im[k]); + const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]); + const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]); + const __m256 a = _mm256_mul_ps(X_re, G_re); + const __m256 b = _mm256_mul_ps(X_im, G_im); + const __m256 c = _mm256_mul_ps(X_re, G_im); + const __m256 d = _mm256_mul_ps(X_im, G_re); + const __m256 e = _mm256_add_ps(a, b); + const __m256 f = _mm256_sub_ps(c, d); + const __m256 g = _mm256_add_ps(H_re, e); + const __m256 h = _mm256_add_ps(H_im, f); + _mm256_storeu_ps(&H_p_ch.re[k], g); + _mm256_storeu_ps(&H_p_ch.im[k], h); + } + } + } + X_partition = 0; + limit = lim2; + } while (p < lim2); + + X_partition = render_buffer.Position(); + limit = lim1; + p = 0; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + FftData& H_p_ch = (*H)[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + + H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + } + + X_partition = 0; + limit = lim2; + } while (p < lim2); +} + +// Produces the filter output (AVX2 variant). +void ApplyFilter_Avx2(const RenderBuffer& render_buffer, + size_t num_partitions, + const std::vector>& H, + FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView> render_buffer_data = + render_buffer.GetFftBuffer(); + const size_t num_render_channels = render_buffer_data[0].size(); + const size_t lim1 = std::min( + render_buffer_data.size() - render_buffer.Position(), num_partitions); + const size_t lim2 = num_partitions; + constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8; + + size_t X_partition = render_buffer.Position(); + size_t p = 0; + size_t limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) { + const __m256 X_re = _mm256_loadu_ps(&X.re[k]); + const __m256 X_im = _mm256_loadu_ps(&X.im[k]); + const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]); + const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]); + const __m256 S_re = _mm256_loadu_ps(&S->re[k]); + const __m256 S_im = _mm256_loadu_ps(&S->im[k]); + const __m256 a = _mm256_mul_ps(X_re, H_re); + const __m256 b = _mm256_mul_ps(X_im, H_im); + const __m256 c = _mm256_mul_ps(X_re, H_im); + const __m256 d = _mm256_mul_ps(X_im, H_re); + const __m256 e = _mm256_sub_ps(a, b); + const __m256 f = _mm256_add_ps(c, d); + const __m256 g = _mm256_add_ps(S_re, e); + const __m256 h = _mm256_add_ps(S_im, f); + _mm256_storeu_ps(&S->re[k], g); + _mm256_storeu_ps(&S->im[k], h); + } + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); + + X_partition = render_buffer.Position(); + p = 0; + limit = lim1; + do { + for (; p < limit; ++p, ++X_partition) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const FftData& H_p_ch = H[p][ch]; + const FftData& X = render_buffer_data[X_partition][ch]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2]; + } + } + limit = lim2; + X_partition = 0; + } while (p < lim2); +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc new file mode 100644 index 0000000000..45b8813979 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include +#include + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer(const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(), + std::plus()); + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_NEON( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]); + float32x4_t erl_k = vld1q_f32(&erl[k]); + erl_k = vaddq_f32(erl_k, H2_j_k); + vst1q_f32(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_SSE2( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]); + __m128 erl_k = _mm_loadu_ps(&erl[k]); + erl_k = _mm_add_ps(erl_k, H2_j_k); + _mm_storeu_ps(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +} // namespace aec3 + +void ComputeErl(const Aec3Optimization& optimization, + const std::vector>& H2, + rtc::ArrayView erl) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size()); + // Update the frequency response and echo return loss for the filter. + switch (optimization) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ErlComputer_SSE2(H2, erl); + break; + case Aec3Optimization::kAvx2: + aec3::ErlComputer_AVX2(H2, erl); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ErlComputer_NEON(H2, erl); + break; +#endif + default: + aec3::ErlComputer(H2, erl); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h new file mode 100644 index 0000000000..4ac13b1bc3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer(const std::vector>& H2, + rtc::ArrayView erl); +#if defined(WEBRTC_HAS_NEON) +void ErlComputer_NEON( + const std::vector>& H2, + rtc::ArrayView erl); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ErlComputer_SSE2( + const std::vector>& H2, + rtc::ArrayView erl); + +void ErlComputer_AVX2( + const std::vector>& H2, + rtc::ArrayView erl); +#endif + +} // namespace aec3 + +// Computes the echo return loss based on a frequency response. +void ComputeErl(const Aec3Optimization& optimization, + const std::vector>& H2, + rtc::ArrayView erl); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc new file mode 100644 index 0000000000..5fe7514db1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void ErlComputer_AVX2( + const std::vector>& H2, + rtc::ArrayView erl) { + std::fill(erl.begin(), erl.end(), 0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 8) { + const __m256 H2_j_k = _mm256_loadu_ps(&H2_j[k]); + __m256 erl_k = _mm256_loadu_ps(&erl[k]); + erl_k = _mm256_add_ps(erl_k, H2_j_k); + _mm256_storeu_ps(&erl[k], erl_k); + } + erl[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build new file mode 100644 index 0000000000..60ecc93ab9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_fir_filter_erl_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc new file mode 100644 index 0000000000..d2af70a9f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_unittest.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" + +#include +#include + +#include "rtc_base/system/arch.h" +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlNeonOptimization) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_NEON; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_NEON(H2, erl_NEON); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_NEON[j]); + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlSse2Optimization) { + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_SSE2; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_SSE2(H2, erl_SSE2); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_SSE2[j]); + } + } +} + +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlAvx2Optimization) { + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + const size_t kNumPartitions = 12; + std::vector> H2(kNumPartitions); + std::array erl; + std::array erl_AVX2; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + ErlComputer(H2, erl); + ErlComputer_AVX2(H2, erl_AVX2); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_AVX2[j]); + } + } +} + +#endif + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build new file mode 100644 index 0000000000..fd78a43560 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_fir_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc new file mode 100644 index 0000000000..a13764c109 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include + +#include +#include +#include + +#include "rtc_base/system/arch.h" +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t num_render_channels, size_t delay) { + rtc::StringBuilder ss; + ss << "delay: " << delay << ", "; + ss << "num_render_channels:" << num_render_channels; + return ss.Release(); +} + +} // namespace + +class AdaptiveFirFilterOneTwoFourEightRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AdaptiveFirFilterOneTwoFourEightRenderChannels, + ::testing::Values(1, 2, 4, 8)); + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for filter adaptation are similar to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationNeonOptimizations) { + const size_t num_render_channels = GetParam(); + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Neon; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Neon( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Neon[p][ch].Clear(); + } + } + + for (int k = 0; k < 30; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + } + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + for (size_t j = 0; j < G.re.size(); ++j) { + G.re[j] = j / 10001.f; + } + for (size_t j = 1; j < G.im.size() - 1; ++j) { + G.im[j] = j / 20001.f; + } + G.im[0] = 0.f; + G.im[G.im.size() - 1] = 0.f; + + AdaptPartitions_Neon(*render_buffer, G, num_partitions, &H_Neon); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + AdaptPartitions_Neon(*render_buffer, G, num_partitions, &H_Neon); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Neon[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Neon[p][ch].im[j]); + } + } + } + + ApplyFilter_Neon(*render_buffer, num_partitions, H_Neon, &S_Neon); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_NEAR(S_C.re[j], S_Neon.re[j], fabs(S_C.re[j] * 0.00001f)); + EXPECT_NEAR(S_C.im[j], S_Neon.im[j], fabs(S_C.re[j] * 0.00001f)); + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseNeonOptimization) { + const size_t num_render_channels = GetParam(); + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Neon(num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Neon(num_partitions, H, &H2_Neon); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Neon[p][k]); + } + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for filter adaptation are bitexact to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationSse2Optimizations) { + const size_t num_render_channels = GetParam(); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Sse2; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Sse2( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Sse2[p][ch].Clear(); + } + } + + for (size_t k = 0; k < 500; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + ApplyFilter_Sse2(*render_buffer, num_partitions, H_Sse2, &S_Sse2); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_FLOAT_EQ(S_C.re[j], S_Sse2.re[j]); + EXPECT_FLOAT_EQ(S_C.im[j], S_Sse2.im[j]); + } + + std::for_each(G.re.begin(), G.re.end(), + [&](float& a) { a = random_generator.Rand(); }); + std::for_each(G.im.begin(), G.im.end(), + [&](float& a) { a = random_generator.Rand(); }); + + AdaptPartitions_Sse2(*render_buffer, G, num_partitions, &H_Sse2); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Sse2[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Sse2[p][ch].im[j]); + } + } + } + } + } + } +} + +// Verifies that the optimized methods for filter adaptation are bitexact to +// their reference counterparts. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + FilterAdaptationAvx2Optimizations) { + const size_t num_render_channels = GetParam(); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + num_render_channels)); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + FftData S_C; + FftData S_Avx2; + FftData G; + Aec3Fft fft; + std::vector> H_C( + num_partitions, std::vector(num_render_channels)); + std::vector> H_Avx2( + num_partitions, std::vector(num_render_channels)); + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + H_C[p][ch].Clear(); + H_Avx2[p][ch].Clear(); + } + } + + for (size_t k = 0; k < 500; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int ch = 0; ch < x.NumChannels(); ++ch) { + RandomizeSampleVector(&random_generator, x.View(band, ch)); + } + } + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + ApplyFilter_Avx2(*render_buffer, num_partitions, H_Avx2, &S_Avx2); + ApplyFilter(*render_buffer, num_partitions, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_FLOAT_EQ(S_C.re[j], S_Avx2.re[j]); + EXPECT_FLOAT_EQ(S_C.im[j], S_Avx2.im[j]); + } + + std::for_each(G.re.begin(), G.re.end(), + [&](float& a) { a = random_generator.Rand(); }); + std::for_each(G.im.begin(), G.im.end(), + [&](float& a) { a = random_generator.Rand(); }); + + AdaptPartitions_Avx2(*render_buffer, G, num_partitions, &H_Avx2); + AdaptPartitions(*render_buffer, G, num_partitions, &H_C); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t j = 0; j < H_C[p][ch].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[p][ch].re[j], H_Avx2[p][ch].re[j]); + EXPECT_FLOAT_EQ(H_C[p][ch].im[j], H_Avx2[p][ch].im[j]); + } + } + } + } + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseSse2Optimization) { + const size_t num_render_channels = GetParam(); + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Sse2( + num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Sse2(num_partitions, H, &H2_Sse2); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Sse2[p][k]); + } + } + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST_P(AdaptiveFirFilterOneTwoFourEightRenderChannels, + ComputeFrequencyResponseAvx2Optimization) { + const size_t num_render_channels = GetParam(); + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + if (use_avx2) { + for (size_t num_partitions : {2, 5, 12, 30, 50}) { + std::vector> H( + num_partitions, std::vector(num_render_channels)); + std::vector> H2(num_partitions); + std::vector> H2_Avx2( + num_partitions); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < H[p][ch].re.size(); ++k) { + H[p][ch].re[k] = k + p / 3.f + ch; + H[p][ch].im[k] = p + k / 7.f - ch; + } + } + } + + ComputeFrequencyResponse(num_partitions, H, &H2); + ComputeFrequencyResponse_Avx2(num_partitions, H, &H2_Avx2); + + for (size_t p = 0; p < num_partitions; ++p) { + for (size_t k = 0; k < H2[p].size(); ++k) { + EXPECT_FLOAT_EQ(H2[p][k], H2_Avx2[p][k]); + } + } + } + } +} + +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null data dumper works. +TEST(AdaptiveFirFilterDeathTest, NullDataDumper) { + EXPECT_DEATH(AdaptiveFirFilter(9, 9, 250, 1, DetectOptimization(), nullptr), + ""); +} + +// Verifies that the check for non-null filter output works. +TEST(AdaptiveFirFilterDeathTest, NullFilterOutput) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, 9, 250, 1, DetectOptimization(), &data_dumper); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1)); + EXPECT_DEATH(filter.Filter(*render_delay_buffer->GetRenderBuffer(), nullptr), + ""); +} + +#endif + +// Verifies that the filter statistics can be accessed when filter statistics +// are turned on. +TEST(AdaptiveFirFilterTest, FilterStatisticsAccess) { + ApmDataDumper data_dumper(42); + Aec3Optimization optimization = DetectOptimization(); + AdaptiveFirFilter filter(9, 9, 250, 1, optimization, &data_dumper); + std::vector> H2( + filter.max_filter_size_partitions(), + std::array()); + for (auto& H2_k : H2) { + H2_k.fill(0.f); + } + + std::array erl; + ComputeErl(optimization, H2, erl); + filter.ComputeFrequencyResponse(&H2); +} + +// Verifies that the filter size if correctly repported. +TEST(AdaptiveFirFilterTest, FilterSize) { + ApmDataDumper data_dumper(42); + for (size_t filter_size = 1; filter_size < 5; ++filter_size) { + AdaptiveFirFilter filter(filter_size, filter_size, 250, 1, + DetectOptimization(), &data_dumper); + EXPECT_EQ(filter_size, filter.SizePartitions()); + } +} + +class AdaptiveFirFilterMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AdaptiveFirFilterMultiChannel, + ::testing::Combine(::testing::Values(1, 4), + ::testing::Values(1, 8))); + +// Verifies that the filter is being able to properly filter a signal and to +// adapt its coefficients. +TEST_P(AdaptiveFirFilterMultiChannel, FilterAndAdapt) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + constexpr size_t kNumBlocksToProcessPerRenderChannel = 1000; + + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + + if (num_render_channels == 33) { + config.filter.refined = {13, 0.00005f, 0.0005f, 0.0001f, 2.f, 20075344.f}; + config.filter.coarse = {13, 0.1f, 20075344.f}; + config.filter.refined_initial = {12, 0.005f, 0.5f, 0.001f, 2.f, 20075344.f}; + config.filter.coarse_initial = {12, 0.7f, 20075344.f}; + } + + AdaptiveFirFilter filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + std::vector>> H2( + num_capture_channels, std::vector>( + filter.max_filter_size_partitions(), + std::array())); + std::vector> h( + num_capture_channels, + std::vector( + GetTimeDomainLength(filter.max_filter_size_partitions()), 0.f)); + Aec3Fft fft; + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + CoarseFilterUpdateGain gain(config.filter.coarse, + config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(kNumBands, num_render_channels); + std::vector n(kBlockSize, 0.f); + std::vector y(kBlockSize, 0.f); + AecState aec_state(EchoCanceller3Config{}, num_capture_channels); + RenderSignalAnalyzer render_signal_analyzer(config); + absl::optional delay_estimate; + std::vector e(kBlockSize, 0.f); + std::array s_scratch; + std::vector output(num_capture_channels); + FftData S; + FftData G; + FftData E; + std::vector> Y2(num_capture_channels); + std::vector> E2_refined( + num_capture_channels); + std::array E2_coarse; + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(0.f); + } + E2_coarse.fill(0.f); + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + + constexpr float kScale = 1.0f / kFftLengthBy2; + + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + std::vector> delay_buffer( + num_render_channels, DelayBuffer(delay_samples)); + std::vector> x_hp_filter( + num_render_channels); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch] = std::make_unique( + kHighPassFilterCoefficients, 1); + } + CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1); + + SCOPED_TRACE(ProduceDebugText(num_render_channels, delay_samples)); + const size_t num_blocks_to_process = + kNumBlocksToProcessPerRenderChannel * num_render_channels; + for (size_t j = 0; j < num_blocks_to_process; ++j) { + std::fill(y.begin(), y.end(), 0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + RandomizeSampleVector(&random_generator, x.View(/*band=*/0, ch)); + std::array y_channel; + delay_buffer[ch].Delay(x.View(/*band=*/0, ch), y_channel); + for (size_t k = 0; k < y.size(); ++k) { + y[k] += y_channel[k] / num_render_channels; + } + } + + RandomizeSampleVector(&random_generator, n); + const float noise_scaling = 1.f / 100.f / num_render_channels; + for (size_t k = 0; k < y.size(); ++k) { + y[k] += n[k] * noise_scaling; + } + + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch]->Process(x.View(/*band=*/0, ch)); + } + y_hp_filter.Process(y); + + render_delay_buffer->Insert(x); + if (j == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + auto* const render_buffer = render_delay_buffer->GetRenderBuffer(); + + render_signal_analyzer.Update(*render_buffer, + aec_state.MinDirectPathFilterDelay()); + + filter.Filter(*render_buffer, &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e.begin(), e.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E); + for (auto& o : output) { + for (size_t k = 0; k < kBlockSize; ++k) { + o.s_refined[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + } + + std::array render_power; + render_buffer->SpectralSum(filter.SizePartitions(), &render_power); + gain.Compute(render_power, render_signal_analyzer, E, + filter.SizePartitions(), false, &G); + filter.Adapt(*render_buffer, G, &h[0]); + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + + filter.ComputeFrequencyResponse(&H2[0]); + aec_state.Update(delay_estimate, H2, h, *render_buffer, E2_refined, Y2, + output); + } + // Verify that the filter is able to perform well. + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build new file mode 100644 index 0000000000..6f67bd6fad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_avx2_gn/moz.build @@ -0,0 +1,190 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +CXXFLAGS += [ + "-mavx2", + "-mfma" +] + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +Library("aec3_avx2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc new file mode 100644 index 0000000000..3ba10d5baf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_common.h" + +#include + +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +Aec3Optimization DetectOptimization() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (GetCPUInfo(kAVX2) != 0) { + return Aec3Optimization::kAvx2; + } else if (GetCPUInfo(kSSE2) != 0) { + return Aec3Optimization::kSse2; + } +#endif + +#if defined(WEBRTC_HAS_NEON) + return Aec3Optimization::kNeon; +#else + return Aec3Optimization::kNone; +#endif +} + +float FastApproxLog2f(const float in) { + RTC_DCHECK_GT(in, .0f); + // Read and interpret float as uint32_t and then cast to float. + // This is done to extract the exponent (bits 30 - 23). + // "Right shift" of the exponent is then performed by multiplying + // with the constant (1/2^23). Finally, we subtract a constant to + // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). + union { + float dummy; + uint32_t a; + } x = {in}; + float out = x.a; + out *= 1.1920929e-7f; // 1/2^23 + out -= 126.942695f; // Remove bias. + return out; +} + +float Log2TodB(const float in_log2) { + return 3.0102999566398121 * in_log2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h new file mode 100644 index 0000000000..32b564f14b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ + +#include + +namespace webrtc { + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon }; + +constexpr int kNumBlocksPerSecond = 250; + +constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond; +constexpr int kMetricsComputationBlocks = 3; +constexpr int kMetricsCollectionBlocks = + kMetricsReportingIntervalBlocks - kMetricsComputationBlocks; + +constexpr size_t kFftLengthBy2 = 64; +constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1; +constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; +constexpr size_t kFftLength = 2 * kFftLengthBy2; +constexpr size_t kFftLengthBy2Log2 = 6; + +constexpr int kRenderTransferQueueSizeFrames = 100; + +constexpr size_t kMaxNumBands = 3; +constexpr size_t kFrameSize = 160; +constexpr size_t kSubFrameLength = kFrameSize / 2; + +constexpr size_t kBlockSize = kFftLengthBy2; +constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2; + +constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2; +constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32; +constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks = + kMatchedFilterWindowSizeSubBlocks * 3 / 4; + +// TODO(peah): Integrate this with how it is done inside audio_processing_impl. +constexpr size_t NumBandsForRate(int sample_rate_hz) { + return static_cast(sample_rate_hz / 16000); +} + +constexpr bool ValidFullBandRate(int sample_rate_hz) { + return sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000; +} + +constexpr int GetTimeDomainLength(int filter_length_blocks) { + return filter_length_blocks * kFftLengthBy2; +} + +constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor, + size_t num_matched_filters) { + return kBlockSize / down_sampling_factor * + (kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters + + kMatchedFilterWindowSizeSubBlocks + 1); +} + +constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor, + size_t num_matched_filters, + size_t filter_length_blocks) { + return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) / + (kBlockSize / down_sampling_factor) + + filter_length_blocks + 1; +} + +// Detects what kind of optimizations to use for the code. +Aec3Optimization DetectOptimization(); + +// Computes the log2 of the input in a fast an approximate manner. +float FastApproxLog2f(float in); + +// Returns dB from a power quantity expressed in log2. +float Log2TodB(float in_log2); + +static_assert(1 << kBlockSizeLog2 == kBlockSize, + "Proper number of shifts for blocksize"); + +static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2, + "Proper number of shifts for the fft length"); + +static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz"); +static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz"); +static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz"); + +static_assert(ValidFullBandRate(16000), + "Test that 16 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(32000), + "Test that 32 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(48000), + "Test that 48 kHz is a valid sample rate"); +static_assert(!ValidFullBandRate(8001), + "Test that 8001 Hz is not a valid sample rate"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build new file mode 100644 index 0000000000..b0952a7d0c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc new file mode 100644 index 0000000000..9cc8016f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +namespace { + +const float kHanning64[kFftLengthBy2] = { + 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f, + 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f, + 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f, + 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f, + 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f, + 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f, + 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f, + 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f, + 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f, + 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f, + 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f, + 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f, + 0.0222136f, 0.00991376f, 0.00248461f, 0.f}; + +// Hanning window from Matlab command win = sqrt(hanning(128)). +const float kSqrtHanning128[kFftLength] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +bool IsSse2Available() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + return GetCPUInfo(kSSE2) != 0; +#else + return false; +#endif +} + +} // namespace + +Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {} + +// TODO(peah): Change x to be std::array once the rest of the code allows this. +void Aec3Fft::ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + std::array fft; + std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f); + switch (window) { + case Window::kRectangular: + std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2); + break; + case Window::kHanning: + std::transform(x.begin(), x.end(), std::begin(kHanning64), + fft.begin() + kFftLengthBy2, + [](float a, float b) { return a * b; }); + break; + case Window::kSqrtHanning: + RTC_DCHECK_NOTREACHED(); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + Fft(&fft, X); +} + +void Aec3Fft::PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + Window window, + FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + RTC_DCHECK_EQ(kFftLengthBy2, x_old.size()); + std::array fft; + + switch (window) { + case Window::kRectangular: + std::copy(x_old.begin(), x_old.end(), fft.begin()); + std::copy(x.begin(), x.end(), fft.begin() + x_old.size()); + break; + case Window::kHanning: + RTC_DCHECK_NOTREACHED(); + break; + case Window::kSqrtHanning: + std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128), + fft.begin(), std::multiplies()); + std::transform(x.begin(), x.end(), + std::begin(kSqrtHanning128) + x_old.size(), + fft.begin() + x_old.size(), std::multiplies()); + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + Fft(&fft, X); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h new file mode 100644 index 0000000000..c68de53963 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ + +#include + +#include "api/array_view.h" +#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Wrapper class that provides 128 point real valued FFT functionality with the +// FftData type. +class Aec3Fft { + public: + enum class Window { kRectangular, kHanning, kSqrtHanning }; + + Aec3Fft(); + + Aec3Fft(const Aec3Fft&) = delete; + Aec3Fft& operator=(const Aec3Fft&) = delete; + + // Computes the FFT. Note that both the input and output are modified. + void Fft(std::array* x, FftData* X) const { + RTC_DCHECK(x); + RTC_DCHECK(X); + ooura_fft_.Fft(x->data()); + X->CopyFromPackedArray(*x); + } + // Computes the inverse Fft. + void Ifft(const FftData& X, std::array* x) const { + RTC_DCHECK(x); + X.CopyToPackedArray(x); + ooura_fft_.InverseFft(x->data()); + } + + // Windows the input using a Hanning window, and then adds padding of + // kFftLengthBy2 initial zeros before computing the Fft. + void ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const; + + // Concatenates the kFftLengthBy2 values long x and x_old before computing the + // Fft. After that, x is copied to x_old. + void PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + FftData* X) const { + PaddedFft(x, x_old, Window::kRectangular, X); + } + + // Padded Fft using a time-domain window. + void PaddedFft(rtc::ArrayView x, + rtc::ArrayView x_old, + Window window, + FftData* X) const; + + private: + const OouraFft ooura_fft_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build new file mode 100644 index 0000000000..97bbc43539 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_fft_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc new file mode 100644 index 0000000000..e60ef5b713 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3FftDeathTest, NullFftInput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Fft(nullptr, &X), ""); +} + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3FftDeathTest, NullFftOutput) { + Aec3Fft fft; + std::array x; + EXPECT_DEATH(fft.Fft(&x, nullptr), ""); +} + +// Verifies that the check for non-null output in Ifft works. +TEST(Aec3FftDeathTest, NullIfftOutput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Ifft(X, nullptr), ""); +} + +// Verifies that the check for non-null output in ZeroPaddedFft works. +TEST(Aec3FftDeathTest, NullZeroPaddedFftOutput) { + Aec3Fft fft; + std::array x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr), + ""); +} + +// Verifies that the check for input length in ZeroPaddedFft works. +TEST(Aec3FftDeathTest, ZeroPaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), ""); +} + +// Verifies that the check for non-null output in PaddedFft works. +TEST(Aec3FftDeathTest, NullPaddedFftOutput) { + Aec3Fft fft; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, nullptr), ""); +} + +// Verifies that the check for input length in PaddedFft works. +TEST(Aec3FftDeathTest, PaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +// Verifies that the check for length in the old value in PaddedFft works. +TEST(Aec3FftDeathTest, PaddedFftWrongOldValuesLength) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +#endif + +// Verifies that Fft works as intended. +TEST(Aec3Fft, Fft) { + Aec3Fft fft; + FftData X; + std::array x; + x.fill(0.f); + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(0.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(0.f); + x[0] = 1.f; + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(1.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(1.f); + fft.Fft(&x, &X); + EXPECT_EQ(128.f, X.re[0]); + std::for_each(X.re.begin() + 1, X.re.end(), + [](float a) { EXPECT_EQ(0.f, a); }); + EXPECT_THAT(X.im, ::testing::Each(0.f)); +} + +// Verifies that InverseFft works as intended. +TEST(Aec3Fft, Ifft) { + Aec3Fft fft; + FftData X; + std::array x; + + X.re.fill(0.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(0.f)); + + X.re.fill(1.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_EQ(64.f, x[0]); + std::for_each(x.begin() + 1, x.end(), [](float a) { EXPECT_EQ(0.f, a); }); + + X.re.fill(0.f); + X.re[0] = 128; + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(64.f)); +} + +// Verifies that InverseFft and Fft work as intended. +TEST(Aec3Fft, FftAndIfft) { + Aec3Fft fft; + FftData X; + std::array x; + std::array x_ref; + + int v = 0; + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x.size(); ++j) { + x[j] = v++; + x_ref[j] = x[j] * 64.f; + } + fft.Fft(&x, &X); + fft.Ifft(X, &x); + for (size_t j = 0; j < x.size(); ++j) { + EXPECT_NEAR(x_ref[j], x[j], 0.001f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, ZeroPaddedFft) { + Aec3Fft fft; + FftData X; + std::array x_in; + std::array x_ref; + std::array x_out; + + int v = 0; + x_ref.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + x_ref[j + kFftLengthBy2] = x_in[j] * 64.f; + } + fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X); + fft.Ifft(X, &x_out); + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, PaddedFft) { + Aec3Fft fft; + FftData X; + std::array x_in; + std::array x_out; + std::array x_old; + std::array x_old_ref; + std::array x_ref; + + int v = 0; + x_old.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + } + + std::copy(x_old.begin(), x_old.end(), x_ref.begin()); + std::copy(x_in.begin(), x_in.end(), x_ref.begin() + kFftLengthBy2); + std::copy(x_in.begin(), x_in.end(), x_old_ref.begin()); + std::for_each(x_ref.begin(), x_ref.end(), [](float& a) { a *= 64.f; }); + + fft.PaddedFft(x_in, x_old, &X); + std::copy(x_in.begin(), x_in.end(), x_old.begin()); + fft.Ifft(X, &x_out); + + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + + EXPECT_EQ(x_old_ref, x_old); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build new file mode 100644 index 0000000000..6646d41ff3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec3_gn/moz.build @@ -0,0 +1,289 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec3_common.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec3_fft.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc", + "/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec3_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc new file mode 100644 index 0000000000..81fd91fab9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.cc @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +bool DeactivateInitialStateResetAtEchoPathChange() { + return field_trial::IsEnabled( + "WebRTC-Aec3DeactivateInitialStateResetKillSwitch"); +} + +bool FullResetAtEchoPathChange() { + return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch"); +} + +bool SubtractorAnalyzerResetAtEchoPathChange() { + return !field_trial::IsEnabled( + "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch"); +} + +void ComputeAvgRenderReverb( + const SpectrumBuffer& spectrum_buffer, + int delay_blocks, + float reverb_decay, + ReverbModel* reverb_model, + rtc::ArrayView reverb_power_spectrum) { + RTC_DCHECK(reverb_model); + const size_t num_render_channels = spectrum_buffer.buffer[0].size(); + int idx_at_delay = + spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks); + int idx_past = spectrum_buffer.IncIndex(idx_at_delay); + + std::array X2_data; + rtc::ArrayView X2; + if (num_render_channels > 1) { + auto average_channels = + [](size_t num_render_channels, + rtc::ArrayView> + spectrum_band_0, + rtc::ArrayView render_power) { + std::fill(render_power.begin(), render_power.end(), 0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power[k] += spectrum_band_0[ch][k]; + } + } + const float normalizer = 1.f / num_render_channels; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power[k] *= normalizer; + } + }; + average_channels(num_render_channels, spectrum_buffer.buffer[idx_past], + X2_data); + reverb_model->UpdateReverbNoFreqShaping( + X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay); + + average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay], + X2_data); + X2 = X2_data; + } else { + reverb_model->UpdateReverbNoFreqShaping( + spectrum_buffer.buffer[idx_past][/*channel=*/0], + /*power_spectrum_scaling=*/1.0f, reverb_decay); + + X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0]; + } + + rtc::ArrayView reverb_power = + reverb_model->reverb(); + for (size_t k = 0; k < X2.size(); ++k) { + reverb_power_spectrum[k] = X2[k] + reverb_power[k]; + } +} + +} // namespace + +std::atomic AecState::instance_count_(0); + +void AecState::GetResidualEchoScaling( + rtc::ArrayView residual_scaling) const { + bool filter_has_had_time_to_converge; + if (config_.filter.conservative_initial_phase) { + filter_has_had_time_to_converge = + strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond; + } else { + filter_has_had_time_to_converge = + strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond; + } + echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge, + residual_scaling); +} + +AecState::AecState(const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(config), + num_capture_channels_(num_capture_channels), + deactivate_initial_state_reset_at_echo_path_change_( + DeactivateInitialStateResetAtEchoPathChange()), + full_reset_at_echo_path_change_(FullResetAtEchoPathChange()), + subtractor_analyzer_reset_at_echo_path_change_( + SubtractorAnalyzerResetAtEchoPathChange()), + initial_state_(config_), + delay_state_(config_, num_capture_channels_), + transparent_state_(TransparentMode::Create(config_)), + filter_quality_state_(config_, num_capture_channels_), + erl_estimator_(2 * kNumBlocksPerSecond), + erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_), + filter_analyzer_(config_, num_capture_channels_), + echo_audibility_( + config_.echo_audibility.use_stationarity_properties_at_init), + reverb_model_estimator_(config_, num_capture_channels_), + subtractor_output_analyzer_(num_capture_channels_) {} + +AecState::~AecState() = default; + +void AecState::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + const auto full_reset = [&]() { + filter_analyzer_.Reset(); + capture_signal_saturation_ = false; + strong_not_saturated_render_blocks_ = 0; + blocks_with_active_render_ = 0; + if (!deactivate_initial_state_reset_at_echo_path_change_) { + initial_state_.Reset(); + } + if (transparent_state_) { + transparent_state_->Reset(); + } + erle_estimator_.Reset(true); + erl_estimator_.Reset(); + filter_quality_state_.Reset(); + }; + + // TODO(peah): Refine the reset scheme according to the type of gain and + // delay adjustment. + + if (full_reset_at_echo_path_change_ && + echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + full_reset(); + } else if (echo_path_variability.gain_change) { + erle_estimator_.Reset(false); + } + if (subtractor_analyzer_reset_at_echo_path_change_) { + subtractor_output_analyzer_.HandleEchoPathChange(); + } +} + +void AecState::Update( + const absl::optional& external_delay, + rtc::ArrayView>> + adaptive_filter_frequency_responses, + rtc::ArrayView> adaptive_filter_impulse_responses, + const RenderBuffer& render_buffer, + rtc::ArrayView> E2_refined, + rtc::ArrayView> Y2, + rtc::ArrayView subtractor_output) { + RTC_DCHECK_EQ(num_capture_channels_, Y2.size()); + RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size()); + RTC_DCHECK_EQ(num_capture_channels_, + adaptive_filter_frequency_responses.size()); + RTC_DCHECK_EQ(num_capture_channels_, + adaptive_filter_impulse_responses.size()); + + // Analyze the filter outputs and filters. + bool any_filter_converged; + bool any_coarse_filter_converged; + bool all_filters_diverged; + subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged, + &any_coarse_filter_converged, + &all_filters_diverged); + + bool any_filter_consistent; + float max_echo_path_gain; + filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer, + &any_filter_consistent, &max_echo_path_gain); + + // Estimate the direct path delay of the filter. + if (config_.filter.use_linear_filter) { + delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay, + strong_not_saturated_render_blocks_); + } + + const Block& aligned_render_block = + render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay()); + + // Update render counters. + bool active_render = false; + for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) { + const float render_energy = + std::inner_product(aligned_render_block.begin(/*block=*/0, ch), + aligned_render_block.end(/*block=*/0, ch), + aligned_render_block.begin(/*block=*/0, ch), 0.f); + if (render_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2) { + active_render = true; + break; + } + } + blocks_with_active_render_ += active_render ? 1 : 0; + strong_not_saturated_render_blocks_ += + active_render && !SaturatedCapture() ? 1 : 0; + + std::array avg_render_spectrum_with_reverb; + + ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(), + delay_state_.MinDirectPathFilterDelay(), + ReverbDecay(/*mild=*/false), &avg_render_reverb_, + avg_render_spectrum_with_reverb); + + if (config_.echo_audibility.use_stationarity_properties) { + // Update the echo audibility evaluator. + echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(), + delay_state_.MinDirectPathFilterDelay(), + delay_state_.ExternalDelayReported()); + } + + // Update the ERL and ERLE measures. + if (initial_state_.TransitionTriggered()) { + erle_estimator_.Reset(false); + } + + erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses, + avg_render_spectrum_with_reverb, Y2, E2_refined, + subtractor_output_analyzer_.ConvergedFilters()); + + erl_estimator_.Update( + subtractor_output_analyzer_.ConvergedFilters(), + render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2); + + // Detect and flag echo saturation. + if (config_.ep_strength.echo_can_saturate) { + saturation_detector_.Update(aligned_render_block, SaturatedCapture(), + UsableLinearEstimate(), subtractor_output, + max_echo_path_gain); + } else { + RTC_DCHECK(!saturation_detector_.SaturatedEcho()); + } + + // Update the decision on whether to use the initial state parameter set. + initial_state_.Update(active_render, SaturatedCapture()); + + // Detect whether the transparent mode should be activated. + if (transparent_state_) { + transparent_state_->Update( + delay_state_.MinDirectPathFilterDelay(), any_filter_consistent, + any_filter_converged, any_coarse_filter_converged, all_filters_diverged, + active_render, SaturatedCapture()); + } + + // Analyze the quality of the filter. + filter_quality_state_.Update(active_render, TransparentModeActive(), + SaturatedCapture(), external_delay, + any_filter_converged); + + // Update the reverb estimate. + const bool stationary_block = + config_.echo_audibility.use_stationarity_properties && + echo_audibility_.IsBlockStationary(); + + reverb_model_estimator_.Update( + filter_analyzer_.GetAdjustedFilters(), + adaptive_filter_frequency_responses, + erle_estimator_.GetInstLinearQualityEstimates(), + delay_state_.DirectPathFilterDelays(), + filter_quality_state_.UsableLinearFilterOutputs(), stationary_block); + + erle_estimator_.Dump(data_dumper_); + reverb_model_estimator_.Dump(data_dumper_.get()); + data_dumper_->DumpRaw("aec3_active_render", active_render); + data_dumper_->DumpRaw("aec3_erl", Erl()); + data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); + data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]); + data_dumper_->DumpRaw("aec3_erle_onset_compensated", + Erle(/*onset_compensated=*/true)[0]); + data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); + data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive()); + data_dumper_->DumpRaw("aec3_filter_delay", + filter_analyzer_.MinFilterDelayBlocks()); + + data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent); + data_dumper_->DumpRaw("aec3_initial_state", + initial_state_.InitialStateActive()); + data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); + data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho()); + data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged); + data_dumper_->DumpRaw("aec3_any_coarse_filter_converged", + any_coarse_filter_converged); + data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged); + + data_dumper_->DumpRaw("aec3_external_delay_avaliable", + external_delay ? 1 : 0); + data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", + GetReverbFrequencyResponse()); + data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2); + data_dumper_->DumpRaw("aec3_subtractor_e2_coarse", + subtractor_output[0].e2_coarse); + data_dumper_->DumpRaw("aec3_subtractor_e2_refined", + subtractor_output[0].e2_refined); +} + +AecState::InitialState::InitialState(const EchoCanceller3Config& config) + : conservative_initial_phase_(config.filter.conservative_initial_phase), + initial_state_seconds_(config.filter.initial_state_seconds) { + Reset(); +} +void AecState::InitialState::InitialState::Reset() { + initial_state_ = true; + strong_not_saturated_render_blocks_ = 0; +} +void AecState::InitialState::InitialState::Update(bool active_render, + bool saturated_capture) { + strong_not_saturated_render_blocks_ += + active_render && !saturated_capture ? 1 : 0; + + // Flag whether the initial state is still active. + bool prev_initial_state = initial_state_; + if (conservative_initial_phase_) { + initial_state_ = + strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond; + } else { + initial_state_ = strong_not_saturated_render_blocks_ < + initial_state_seconds_ * kNumBlocksPerSecond; + } + + // Flag whether the transition from the initial state has started. + transition_triggered_ = !initial_state_ && prev_initial_state; +} + +AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config, + size_t num_capture_channels) + : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), + filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_), + min_filter_delay_(delay_headroom_blocks_) {} + +void AecState::FilterDelay::Update( + rtc::ArrayView analyzer_filter_delay_estimates_blocks, + const absl::optional& external_delay, + size_t blocks_with_proper_filter_adaptation) { + // Update the delay based on the external delay. + if (external_delay && + (!external_delay_ || external_delay_->delay != external_delay->delay)) { + external_delay_ = external_delay; + external_delay_reported_ = true; + } + + // Override the estimated delay if it is not certain that the filter has had + // time to converge. + const bool delay_estimator_may_not_have_converged = + blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond; + if (delay_estimator_may_not_have_converged && external_delay_) { + const int delay_guess = delay_headroom_blocks_; + std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), + delay_guess); + } else { + RTC_DCHECK_EQ(filter_delays_blocks_.size(), + analyzer_filter_delay_estimates_blocks.size()); + std::copy(analyzer_filter_delay_estimates_blocks.begin(), + analyzer_filter_delay_estimates_blocks.end(), + filter_delays_blocks_.begin()); + } + + min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(), + filter_delays_blocks_.end()); +} + +AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( + const EchoCanceller3Config& config, + size_t num_capture_channels) + : use_linear_filter_(config.filter.use_linear_filter), + usable_linear_filter_estimates_(num_capture_channels, false) {} + +void AecState::FilteringQualityAnalyzer::Reset() { + std::fill(usable_linear_filter_estimates_.begin(), + usable_linear_filter_estimates_.end(), false); + overall_usable_linear_estimates_ = false; + filter_update_blocks_since_reset_ = 0; +} + +void AecState::FilteringQualityAnalyzer::Update( + bool active_render, + bool transparent_mode, + bool saturated_capture, + const absl::optional& external_delay, + bool any_filter_converged) { + // Update blocks counter. + const bool filter_update = active_render && !saturated_capture; + filter_update_blocks_since_reset_ += filter_update ? 1 : 0; + filter_update_blocks_since_start_ += filter_update ? 1 : 0; + + // Store convergence flag when observed. + convergence_seen_ = convergence_seen_ || any_filter_converged; + + // Verify requirements for achieving a decent filter. The requirements for + // filter adaptation at call startup are more restrictive than after an + // in-call reset. + const bool sufficient_data_to_converge_at_startup = + filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f; + const bool sufficient_data_to_converge_at_reset = + sufficient_data_to_converge_at_startup && + filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f; + + // The linear filter can only be used if it has had time to converge. + overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup && + sufficient_data_to_converge_at_reset; + + // The linear filter can only be used if an external delay or convergence have + // been identified + overall_usable_linear_estimates_ = + overall_usable_linear_estimates_ && (external_delay || convergence_seen_); + + // If transparent mode is on, deactivate usign the linear filter. + overall_usable_linear_estimates_ = + overall_usable_linear_estimates_ && !transparent_mode; + + if (use_linear_filter_) { + std::fill(usable_linear_filter_estimates_.begin(), + usable_linear_filter_estimates_.end(), + overall_usable_linear_estimates_); + } +} + +void AecState::SaturationDetector::Update( + const Block& x, + bool saturated_capture, + bool usable_linear_estimate, + rtc::ArrayView subtractor_output, + float echo_path_gain) { + saturated_echo_ = false; + if (!saturated_capture) { + return; + } + + if (usable_linear_estimate) { + constexpr float kSaturationThreshold = 20000.f; + for (size_t ch = 0; ch < subtractor_output.size(); ++ch) { + saturated_echo_ = + saturated_echo_ || + (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold || + subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold); + } + } else { + float max_sample = 0.f; + for (int ch = 0; ch < x.NumChannels(); ++ch) { + rtc::ArrayView x_ch = x.View(/*band=*/0, ch); + for (float sample : x_ch) { + max_sample = std::max(max_sample, fabsf(sample)); + } + } + + const float kMargin = 10.f; + float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; + saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h new file mode 100644 index 0000000000..a39325c8b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ + +#include + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_audibility.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/erl_estimator.h" +#include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/filter_analyzer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/reverb_model_estimator.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/aec3/subtractor_output_analyzer.h" +#include "modules/audio_processing/aec3/transparent_mode.h" + +namespace webrtc { + +class ApmDataDumper; + +// Handles the state and the conditions for the echo removal functionality. +class AecState { + public: + AecState(const EchoCanceller3Config& config, size_t num_capture_channels); + ~AecState(); + + // Returns whether the echo subtractor can be used to determine the residual + // echo. + bool UsableLinearEstimate() const { + return filter_quality_state_.LinearFilterUsable() && + config_.filter.use_linear_filter; + } + + // Returns whether the echo subtractor output should be used as output. + bool UseLinearFilterOutput() const { + return filter_quality_state_.LinearFilterUsable() && + config_.filter.use_linear_filter; + } + + // Returns whether the render signal is currently active. + bool ActiveRender() const { return blocks_with_active_render_ > 200; } + + // Returns the appropriate scaling of the residual echo to match the + // audibility. + void GetResidualEchoScaling(rtc::ArrayView residual_scaling) const; + + // Returns whether the stationary properties of the signals are used in the + // aec. + bool UseStationarityProperties() const { + return config_.echo_audibility.use_stationarity_properties; + } + + // Returns the ERLE. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return erle_estimator_.Erle(onset_compensated); + } + + // Returns the non-capped ERLE. + rtc::ArrayView> ErleUnbounded() + const { + return erle_estimator_.ErleUnbounded(); + } + + // Returns the fullband ERLE estimate in log2 units. + float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); } + + // Returns the ERL. + const std::array& Erl() const { + return erl_estimator_.Erl(); + } + + // Returns the time-domain ERL. + float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); } + + // Returns the delay estimate based on the linear filter. + int MinDirectPathFilterDelay() const { + return delay_state_.MinDirectPathFilterDelay(); + } + + // Returns whether the capture signal is saturated. + bool SaturatedCapture() const { return capture_signal_saturation_; } + + // Returns whether the echo signal is saturated. + bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); } + + // Updates the capture signal saturation. + void UpdateCaptureSaturation(bool capture_signal_saturation) { + capture_signal_saturation_ = capture_signal_saturation; + } + + // Returns whether the transparent mode is active + bool TransparentModeActive() const { + return transparent_state_ && transparent_state_->Active(); + } + + // Takes appropriate action at an echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Returns the decay factor for the echo reverberation. The parameter `mild` + // indicates which exponential decay to return. The default one or a milder + // one that can be used during nearend regions. + float ReverbDecay(bool mild) const { + return reverb_model_estimator_.ReverbDecay(mild); + } + + // Return the frequency response of the reverberant echo. + rtc::ArrayView GetReverbFrequencyResponse() const { + return reverb_model_estimator_.GetReverbFrequencyResponse(); + } + + // Returns whether the transition for going out of the initial stated has + // been triggered. + bool TransitionTriggered() const { + return initial_state_.TransitionTriggered(); + } + + // Updates the aec state. + // TODO(bugs.webrtc.org/10913): Compute multi-channel ERL. + void Update( + const absl::optional& external_delay, + rtc::ArrayView>> + adaptive_filter_frequency_responses, + rtc::ArrayView> + adaptive_filter_impulse_responses, + const RenderBuffer& render_buffer, + rtc::ArrayView> E2_refined, + rtc::ArrayView> Y2, + rtc::ArrayView subtractor_output); + + // Returns filter length in blocks. + int FilterLengthBlocks() const { + // All filters have the same length, so arbitrarily return channel 0 length. + return filter_analyzer_.FilterLengthBlocks(); + } + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const bool deactivate_initial_state_reset_at_echo_path_change_; + const bool full_reset_at_echo_path_change_; + const bool subtractor_analyzer_reset_at_echo_path_change_; + + // Class for controlling the transition from the intial state, which in turn + // controls when the filter parameters for the initial state should be used. + class InitialState { + public: + explicit InitialState(const EchoCanceller3Config& config); + // Resets the state to again begin in the initial state. + void Reset(); + + // Updates the state based on new data. + void Update(bool active_render, bool saturated_capture); + + // Returns whether the initial state is active or not. + bool InitialStateActive() const { return initial_state_; } + + // Returns that the transition from the initial state has was started. + bool TransitionTriggered() const { return transition_triggered_; } + + private: + const bool conservative_initial_phase_; + const float initial_state_seconds_; + bool transition_triggered_ = false; + bool initial_state_ = true; + size_t strong_not_saturated_render_blocks_ = 0; + } initial_state_; + + // Class for choosing the direct-path delay relative to the beginning of the + // filter, as well as any other data related to the delay used within + // AecState. + class FilterDelay { + public: + FilterDelay(const EchoCanceller3Config& config, + size_t num_capture_channels); + + // Returns whether an external delay has been reported to the AecState (from + // the delay estimator). + bool ExternalDelayReported() const { return external_delay_reported_; } + + // Returns the delay in blocks relative to the beginning of the filter that + // corresponds to the direct path of the echo. + rtc::ArrayView DirectPathFilterDelays() const { + return filter_delays_blocks_; + } + + // Returns the minimum delay among the direct path delays relative to the + // beginning of the filter + int MinDirectPathFilterDelay() const { return min_filter_delay_; } + + // Updates the delay estimates based on new data. + void Update( + rtc::ArrayView analyzer_filter_delay_estimates_blocks, + const absl::optional& external_delay, + size_t blocks_with_proper_filter_adaptation); + + private: + const int delay_headroom_blocks_; + bool external_delay_reported_ = false; + std::vector filter_delays_blocks_; + int min_filter_delay_; + absl::optional external_delay_; + } delay_state_; + + // Classifier for toggling transparent mode when there is no echo. + std::unique_ptr transparent_state_; + + // Class for analyzing how well the linear filter is, and can be expected to, + // perform on the current signals. The purpose of this is for using to + // select the echo suppression functionality as well as the input to the echo + // suppressor. + class FilteringQualityAnalyzer { + public: + FilteringQualityAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels); + + // Returns whether the linear filter can be used for the echo + // canceller output. + bool LinearFilterUsable() const { return overall_usable_linear_estimates_; } + + // Returns whether an individual filter output can be used for the echo + // canceller output. + const std::vector& UsableLinearFilterOutputs() const { + return usable_linear_filter_estimates_; + } + + // Resets the state of the analyzer. + void Reset(); + + // Updates the analysis based on new data. + void Update(bool active_render, + bool transparent_mode, + bool saturated_capture, + const absl::optional& external_delay, + bool any_filter_converged); + + private: + const bool use_linear_filter_; + bool overall_usable_linear_estimates_ = false; + size_t filter_update_blocks_since_reset_ = 0; + size_t filter_update_blocks_since_start_ = 0; + bool convergence_seen_ = false; + std::vector usable_linear_filter_estimates_; + } filter_quality_state_; + + // Class for detecting whether the echo is to be considered to be + // saturated. + class SaturationDetector { + public: + // Returns whether the echo is to be considered saturated. + bool SaturatedEcho() const { return saturated_echo_; } + + // Updates the detection decision based on new data. + void Update(const Block& x, + bool saturated_capture, + bool usable_linear_estimate, + rtc::ArrayView subtractor_output, + float echo_path_gain); + + private: + bool saturated_echo_ = false; + } saturation_detector_; + + ErlEstimator erl_estimator_; + ErleEstimator erle_estimator_; + size_t strong_not_saturated_render_blocks_ = 0; + size_t blocks_with_active_render_ = 0; + bool capture_signal_saturation_ = false; + FilterAnalyzer filter_analyzer_; + EchoAudibility echo_audibility_; + ReverbModelEstimator reverb_model_estimator_; + ReverbModel avg_render_reverb_; + SubtractorOutputAnalyzer subtractor_output_analyzer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc new file mode 100644 index 0000000000..6662c8fb1a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void RunNormalUsageTest(size_t num_render_channels, + size_t num_capture_channels) { + // TODO(bugs.webrtc.org/10913): Test with different content in different + // channels. + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + AecState state(config, num_capture_channels); + absl::optional delay_estimate = + DelayEstimate(DelayEstimate::Quality::kRefined, 10); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + std::vector> E2_refined( + num_capture_channels); + std::vector> Y2(num_capture_channels); + Block x(kNumBands, num_render_channels); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + std::vector> y(num_capture_channels); + std::vector subtractor_output(num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].Reset(); + subtractor_output[ch].s_refined.fill(100.f); + subtractor_output[ch].e_refined.fill(100.f); + y[ch].fill(1000.f); + E2_refined[ch].fill(0.f); + Y2[ch].fill(0.f); + } + Aec3Fft fft; + std::vector>> + converged_filter_frequency_response( + num_capture_channels, + std::vector>(10)); + for (auto& v_ch : converged_filter_frequency_response) { + for (auto& v : v_ch) { + v.fill(0.01f); + } + } + std::vector>> + diverged_filter_frequency_response = converged_filter_frequency_response; + converged_filter_frequency_response[0][2].fill(100.f); + converged_filter_frequency_response[0][2][0] = 1.f; + std::vector> impulse_response( + num_capture_channels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), 0.f)); + + // Verify that linear AEC usability is true when the filter is converged + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::fill(x.begin(band, ch), x.end(band, ch), 101.f); + } + } + for (int k = 0; k < 3000; ++k) { + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + EXPECT_TRUE(state.UsableLinearEstimate()); + + // Verify that linear AEC usability becomes false after an echo path + // change is reported + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + EXPECT_FALSE(state.UsableLinearEstimate()); + + // Verify that the active render detection works as intended. + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::fill(x.begin(0, ch), x.end(0, ch), 101.f); + } + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.HandleEchoPathChange(EchoPathVariability( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + EXPECT_FALSE(state.ActiveRender()); + + for (int k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + EXPECT_TRUE(state.ActiveRender()); + + // Verify that the ERL is properly estimated + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.0f); + } + } + + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x.View(/*band=*/0, ch)[0] = 5000.f; + } + for (size_t k = 0; + k < render_delay_buffer->GetRenderBuffer()->GetFftBuffer().size(); ++k) { + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + } + + for (auto& Y2_ch : Y2) { + Y2_ch.fill(10.f * 10000.f * 10000.f); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + const std::array& erl = state.Erl(); + EXPECT_EQ(erl[0], erl[1]); + for (size_t k = 1; k < erl.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1); + } + EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]); + + // Verify that the ERLE is properly estimated + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(1.f * 10000.f * 10000.f); + } + for (auto& Y2_ch : Y2) { + Y2_ch.fill(10.f * E2_refined[0][0]); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + ASSERT_TRUE(state.UsableLinearEstimate()); + { + // Note that the render spectrum is built so it does not have energy in + // the odd bands but just in the even bands. + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) { + EXPECT_NEAR(4.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; k = k + 2) { + EXPECT_NEAR(1.5f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(1.f * 10000.f * 10000.f); + } + for (auto& Y2_ch : Y2) { + Y2_ch.fill(5.f * E2_refined[0][0]); + } + for (size_t k = 0; k < 1000; ++k) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + subtractor_output[ch].ComputeMetrics(y[ch]); + } + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, *render_delay_buffer->GetRenderBuffer(), + E2_refined, Y2, subtractor_output); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + { + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 4.f : 1.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } +} + +} // namespace + +class AecStateMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + AecStateMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verify the general functionality of AecState +TEST_P(AecStateMultiChannel, NormalUsage) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + RunNormalUsageTest(num_render_channels, num_capture_channels); +} + +// Verifies the delay for a converged filter is correctly identified. +TEST(AecState, ConvergedFilterDelay) { + constexpr int kFilterLengthBlocks = 10; + constexpr size_t kNumCaptureChannels = 1; + EchoCanceller3Config config; + AecState state(config, kNumCaptureChannels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, 48000, 1)); + absl::optional delay_estimate; + std::vector> E2_refined( + kNumCaptureChannels); + std::vector> Y2(kNumCaptureChannels); + std::array x; + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + std::vector subtractor_output(kNumCaptureChannels); + for (auto& output : subtractor_output) { + output.Reset(); + output.s_refined.fill(100.f); + } + std::array y; + x.fill(0.f); + y.fill(0.f); + + std::vector>> + frequency_response(kNumCaptureChannels, + std::vector>( + kFilterLengthBlocks)); + for (auto& v_ch : frequency_response) { + for (auto& v : v_ch) { + v.fill(0.01f); + } + } + + std::vector> impulse_response( + kNumCaptureChannels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), 0.f)); + + // Verify that the filter delay for a converged filter is properly + // identified. + for (int k = 0; k < kFilterLengthBlocks; ++k) { + for (auto& ir : impulse_response) { + std::fill(ir.begin(), ir.end(), 0.f); + ir[k * kBlockSize + 1] = 1.f; + } + + state.HandleEchoPathChange(echo_path_variability); + subtractor_output[0].ComputeMetrics(y); + state.Update(delay_estimate, frequency_response, impulse_response, + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + subtractor_output); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc new file mode 100644 index 0000000000..7f076dea8e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/alignment_mixer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix, + bool adaptive_selection, + int num_channels) { + RTC_DCHECK(!(adaptive_selection && downmix)); + RTC_DCHECK_LT(0, num_channels); + + if (num_channels == 1) { + return AlignmentMixer::MixingVariant::kFixed; + } + if (downmix) { + return AlignmentMixer::MixingVariant::kDownmix; + } + if (adaptive_selection) { + return AlignmentMixer::MixingVariant::kAdaptive; + } + return AlignmentMixer::MixingVariant::kFixed; +} + +} // namespace + +AlignmentMixer::AlignmentMixer( + size_t num_channels, + const EchoCanceller3Config::Delay::AlignmentMixing& config) + : AlignmentMixer(num_channels, + config.downmix, + config.adaptive_selection, + config.activity_power_threshold, + config.prefer_first_two_channels) {} + +AlignmentMixer::AlignmentMixer(size_t num_channels, + bool downmix, + bool adaptive_selection, + float activity_power_threshold, + bool prefer_first_two_channels) + : num_channels_(num_channels), + one_by_num_channels_(1.f / num_channels_), + excitation_energy_threshold_(kBlockSize * activity_power_threshold), + prefer_first_two_channels_(prefer_first_two_channels), + selection_variant_( + ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) { + if (selection_variant_ == MixingVariant::kAdaptive) { + std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0); + cumulative_energies_.resize(num_channels_); + std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f); + } +} + +void AlignmentMixer::ProduceOutput(const Block& x, + rtc::ArrayView y) { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + + if (selection_variant_ == MixingVariant::kDownmix) { + Downmix(x, y); + return; + } + + int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x); + + RTC_DCHECK_GT(x.NumChannels(), ch); + std::copy(x.begin(/*band=*/0, ch), x.end(/*band=*/0, ch), y.begin()); +} + +void AlignmentMixer::Downmix(const Block& x, + rtc::ArrayView y) const { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + RTC_DCHECK_GE(num_channels_, 2); + std::memcpy(&y[0], x.View(/*band=*/0, /*channel=*/0).data(), + kBlockSize * sizeof(y[0])); + for (size_t ch = 1; ch < num_channels_; ++ch) { + const auto x_ch = x.View(/*band=*/0, ch); + for (size_t i = 0; i < kBlockSize; ++i) { + y[i] += x_ch[i]; + } + } + + for (size_t i = 0; i < kBlockSize; ++i) { + y[i] *= one_by_num_channels_; + } +} + +int AlignmentMixer::SelectChannel(const Block& x) { + RTC_DCHECK_EQ(x.NumChannels(), num_channels_); + RTC_DCHECK_GE(num_channels_, 2); + RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_); + + constexpr size_t kBlocksToChooseLeftOrRight = + static_cast(0.5f * kNumBlocksPerSecond); + const bool good_signal_in_left_or_right = + prefer_first_two_channels_ && + (strong_block_counters_[0] > kBlocksToChooseLeftOrRight || + strong_block_counters_[1] > kBlocksToChooseLeftOrRight); + + const int num_ch_to_analyze = + good_signal_in_left_or_right ? 2 : num_channels_; + + constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond; + ++block_counter_; + + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + float x2_sum = 0.f; + rtc::ArrayView x_ch = x.View(/*band=*/0, ch); + for (size_t i = 0; i < kBlockSize; ++i) { + x2_sum += x_ch[i] * x_ch[i]; + } + + if (ch < 2 && x2_sum > excitation_energy_threshold_) { + ++strong_block_counters_[ch]; + } + + if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) { + cumulative_energies_[ch] += x2_sum; + } else { + constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond); + cumulative_energies_[ch] += + kSmoothing * (x2_sum - cumulative_energies_[ch]); + } + } + + // Normalize the energies to allow the energy computations to from now be + // based on smoothing. + if (block_counter_ == kNumBlocksBeforeEnergySmoothing) { + constexpr float kOneByNumBlocksBeforeEnergySmoothing = + 1.f / kNumBlocksBeforeEnergySmoothing; + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing; + } + } + + int strongest_ch = 0; + for (int ch = 0; ch < num_ch_to_analyze; ++ch) { + if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) { + strongest_ch = ch; + } + } + + if ((good_signal_in_left_or_right && selected_channel_ > 1) || + cumulative_energies_[strongest_ch] > + 2.f * cumulative_energies_[selected_channel_]) { + selected_channel_ = strongest_ch; + } + + return selected_channel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h new file mode 100644 index 0000000000..b3ed04755c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Performs channel conversion to mono for the purpose of providing a decent +// mono input for the delay estimation. This is achieved by analyzing all +// incoming channels and produce one single channel output. +class AlignmentMixer { + public: + AlignmentMixer(size_t num_channels, + const EchoCanceller3Config::Delay::AlignmentMixing& config); + + AlignmentMixer(size_t num_channels, + bool downmix, + bool adaptive_selection, + float excitation_limit, + bool prefer_first_two_channels); + + void ProduceOutput(const Block& x, rtc::ArrayView y); + + enum class MixingVariant { kDownmix, kAdaptive, kFixed }; + + private: + const size_t num_channels_; + const float one_by_num_channels_; + const float excitation_energy_threshold_; + const bool prefer_first_two_channels_; + const MixingVariant selection_variant_; + std::array strong_block_counters_; + std::vector cumulative_energies_; + int selected_channel_ = 0; + size_t block_counter_ = 0; + + void Downmix(const Block& x, rtc::ArrayView y) const; + int SelectChannel(const Block& x); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc new file mode 100644 index 0000000000..eaf6dcb235 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/alignment_mixer_unittest.cc @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/alignment_mixer.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::AllOf; +using ::testing::Each; + +namespace webrtc { +namespace { +std::string ProduceDebugText(bool initial_silence, + bool huge_activity_threshold, + bool prefer_first_two_channels, + int num_channels, + int strongest_ch) { + rtc::StringBuilder ss; + ss << ", Initial silence: " << initial_silence; + ss << ", Huge activity threshold: " << huge_activity_threshold; + ss << ", Prefer first two channels: " << prefer_first_two_channels; + ss << ", Number of channels: " << num_channels; + ss << ", Strongest channel: " << strongest_ch; + return ss.Release(); +} + +} // namespace + +TEST(AlignmentMixer, GeneralAdaptiveMode) { + constexpr int kChannelOffset = 100; + constexpr int kMaxChannelsToTest = 8; + constexpr float kStrongestSignalScaling = + kMaxChannelsToTest * kChannelOffset * 100; + + for (bool initial_silence : {false, true}) { + for (bool huge_activity_threshold : {false, true}) { + for (bool prefer_first_two_channels : {false, true}) { + for (int num_channels = 2; num_channels < 8; ++num_channels) { + for (int strongest_ch = 0; strongest_ch < num_channels; + ++strongest_ch) { + SCOPED_TRACE(ProduceDebugText( + initial_silence, huge_activity_threshold, + prefer_first_two_channels, num_channels, strongest_ch)); + const float excitation_limit = + huge_activity_threshold ? 1000000000.f : 0.001f; + AlignmentMixer am(num_channels, /*downmix*/ false, + /*adaptive_selection*/ true, excitation_limit, + prefer_first_two_channels); + + Block x( + /*num_bands=*/1, num_channels); + if (initial_silence) { + std::array y; + for (int frame = 0; frame < 10 * kNumBlocksPerSecond; ++frame) { + am.ProduceOutput(x, y); + } + } + + for (int frame = 0; frame < 2 * kNumBlocksPerSecond; ++frame) { + const auto channel_value = [&](int frame_index, + int channel_index) { + return static_cast(frame_index + + channel_index * kChannelOffset); + }; + + for (int ch = 0; ch < num_channels; ++ch) { + float scaling = + ch == strongest_ch ? kStrongestSignalScaling : 1.f; + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), + channel_value(frame, ch) * scaling); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + + if (frame > 1 * kNumBlocksPerSecond) { + if (!prefer_first_two_channels || huge_activity_threshold) { + EXPECT_THAT(y, + AllOf(Each(x.View(/*band=*/0, strongest_ch)[0]))); + } else { + bool left_or_right_chosen; + for (int ch = 0; ch < 2; ++ch) { + left_or_right_chosen = true; + const auto x_ch = x.View(/*band=*/0, ch); + for (size_t k = 0; k < kBlockSize; ++k) { + if (y[k] != x_ch[k]) { + left_or_right_chosen = false; + break; + } + } + if (left_or_right_chosen) { + break; + } + } + EXPECT_TRUE(left_or_right_chosen); + } + } + } + } + } + } + } + } +} + +TEST(AlignmentMixer, DownmixMode) { + for (int num_channels = 1; num_channels < 8; ++num_channels) { + AlignmentMixer am(num_channels, /*downmix*/ true, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + + Block x(/*num_bands=*/1, num_channels); + const auto channel_value = [](int frame_index, int channel_index) { + return static_cast(frame_index + channel_index); + }; + for (int frame = 0; frame < 10; ++frame) { + for (int ch = 0; ch < num_channels; ++ch) { + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), channel_value(frame, ch)); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + + float expected_mixed_value = 0.f; + for (int ch = 0; ch < num_channels; ++ch) { + expected_mixed_value += channel_value(frame, ch); + } + expected_mixed_value *= 1.f / num_channels; + + EXPECT_THAT(y, AllOf(Each(expected_mixed_value))); + } + } +} + +TEST(AlignmentMixer, FixedMode) { + for (int num_channels = 1; num_channels < 8; ++num_channels) { + AlignmentMixer am(num_channels, /*downmix*/ false, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + + Block x(/*num_band=*/1, num_channels); + const auto channel_value = [](int frame_index, int channel_index) { + return static_cast(frame_index + channel_index); + }; + for (int frame = 0; frame < 10; ++frame) { + for (int ch = 0; ch < num_channels; ++ch) { + auto x_ch = x.View(/*band=*/0, ch); + std::fill(x_ch.begin(), x_ch.end(), channel_value(frame, ch)); + } + + std::array y; + y.fill(-1.f); + am.ProduceOutput(x, y); + EXPECT_THAT(y, AllOf(Each(x.View(/*band=*/0, /*channel=*/0)[0]))); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(AlignmentMixerDeathTest, ZeroNumChannels) { + EXPECT_DEATH( + AlignmentMixer(/*num_channels*/ 0, /*downmix*/ false, + /*adaptive_selection*/ false, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + , ""); +} + +TEST(AlignmentMixerDeathTest, IncorrectVariant) { + EXPECT_DEATH( + AlignmentMixer(/*num_channels*/ 1, /*downmix*/ true, + /*adaptive_selection*/ true, /*excitation_limit*/ 1.f, + /*prefer_first_two_channels*/ false); + , ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc new file mode 100644 index 0000000000..45f56a5dce --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +bool TimeToReportMetrics(int frames_since_last_report) { + constexpr int kNumFramesPerSecond = 100; + constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond; + return frames_since_last_report == kReportingIntervalFrames; +} + +} // namespace + +ApiCallJitterMetrics::Jitter::Jitter() + : max_(0), min_(std::numeric_limits::max()) {} + +void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) { + min_ = std::min(min_, num_api_calls_in_a_row); + max_ = std::max(max_, num_api_calls_in_a_row); +} + +void ApiCallJitterMetrics::Jitter::Reset() { + min_ = std::numeric_limits::max(); + max_ = 0; +} + +void ApiCallJitterMetrics::Reset() { + render_jitter_.Reset(); + capture_jitter_.Reset(); + num_api_calls_in_a_row_ = 0; + frames_since_last_report_ = 0; + last_call_was_render_ = false; + proper_call_observed_ = false; +} + +void ApiCallJitterMetrics::ReportRenderCall() { + if (!last_call_was_render_) { + // If the previous call was a capture and a proper call has been observed + // (containing both render and capture data), storing the last number of + // capture calls into the metrics. + if (proper_call_observed_) { + capture_jitter_.Update(num_api_calls_in_a_row_); + } + + // Reset the call counter to start counting render calls. + num_api_calls_in_a_row_ = 0; + } + ++num_api_calls_in_a_row_; + last_call_was_render_ = true; +} + +void ApiCallJitterMetrics::ReportCaptureCall() { + if (last_call_was_render_) { + // If the previous call was a render and a proper call has been observed + // (containing both render and capture data), storing the last number of + // render calls into the metrics. + if (proper_call_observed_) { + render_jitter_.Update(num_api_calls_in_a_row_); + } + // Reset the call counter to start counting capture calls. + num_api_calls_in_a_row_ = 0; + + // If this statement is reached, at least one render and one capture call + // have been observed. + proper_call_observed_ = true; + } + ++num_api_calls_in_a_row_; + last_call_was_render_ = false; + + // Only report and update jitter metrics for when a proper call, containing + // both render and capture data, has been observed. + if (proper_call_observed_ && + TimeToReportMetrics(++frames_since_last_report_)) { + // Report jitter, where the base basic unit is frames. + constexpr int kMaxJitterToReport = 50; + + // Report max and min jitter for render and capture, in units of 20 ms. + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MaxRenderJitter", + std::min(kMaxJitterToReport, render_jitter().max()), 1, + kMaxJitterToReport, kMaxJitterToReport); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MinRenderJitter", + std::min(kMaxJitterToReport, render_jitter().min()), 1, + kMaxJitterToReport, kMaxJitterToReport); + + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MaxCaptureJitter", + std::min(kMaxJitterToReport, capture_jitter().max()), 1, + kMaxJitterToReport, kMaxJitterToReport); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.MinCaptureJitter", + std::min(kMaxJitterToReport, capture_jitter().min()), 1, + kMaxJitterToReport, kMaxJitterToReport); + + frames_since_last_report_ = 0; + Reset(); + } +} + +bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const { + return TimeToReportMetrics(frames_since_last_report_ + 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h new file mode 100644 index 0000000000..dd1fa82e93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ + +namespace webrtc { + +// Stores data for reporting metrics on the API call jitter. +class ApiCallJitterMetrics { + public: + class Jitter { + public: + Jitter(); + void Update(int num_api_calls_in_a_row); + void Reset(); + + int min() const { return min_; } + int max() const { return max_; } + + private: + int max_; + int min_; + }; + + ApiCallJitterMetrics() { Reset(); } + + // Update metrics for render API call. + void ReportRenderCall(); + + // Update and periodically report metrics for capture API call. + void ReportCaptureCall(); + + // Methods used only for testing. + const Jitter& render_jitter() const { return render_jitter_; } + const Jitter& capture_jitter() const { return capture_jitter_; } + bool WillReportMetricsAtNextCapture() const; + + private: + void Reset(); + + Jitter render_jitter_; + Jitter capture_jitter_; + + int num_api_calls_in_a_row_ = 0; + int frames_since_last_report_ = 0; + bool last_call_was_render_ = false; + bool proper_call_observed_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc new file mode 100644 index 0000000000..b902487152 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/api_call_jitter_metrics_unittest.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify constant jitter. +TEST(ApiCallJitterMetrics, ConstantJitter) { + for (int jitter = 1; jitter < 20; ++jitter) { + ApiCallJitterMetrics metrics; + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + for (int j = 0; j < jitter; ++j) { + metrics.ReportRenderCall(); + } + + for (int j = 0; j < jitter; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(jitter, metrics.render_jitter().min()); + EXPECT_EQ(jitter, metrics.render_jitter().max()); + EXPECT_EQ(jitter, metrics.capture_jitter().min()); + EXPECT_EQ(jitter, metrics.capture_jitter().max()); + } + } + } + } +} + +// Verify peaky jitter for the render. +TEST(ApiCallJitterMetrics, JitterPeakRender) { + constexpr int kMinJitter = 2; + constexpr int kJitterPeak = 10; + constexpr int kPeakInterval = 100; + + ApiCallJitterMetrics metrics; + int render_surplus = 0; + + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + const int num_render_calls = + k % kPeakInterval == 0 ? kJitterPeak : kMinJitter; + for (int j = 0; j < num_render_calls; ++j) { + metrics.ReportRenderCall(); + ++render_surplus; + } + + ASSERT_LE(kMinJitter, render_surplus); + const int num_capture_calls = + render_surplus == kMinJitter ? kMinJitter : kMinJitter + 1; + for (int j = 0; j < num_capture_calls; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(kMinJitter, metrics.render_jitter().min()); + EXPECT_EQ(kJitterPeak, metrics.render_jitter().max()); + EXPECT_EQ(kMinJitter, metrics.capture_jitter().min()); + EXPECT_EQ(kMinJitter + 1, metrics.capture_jitter().max()); + } + --render_surplus; + } + } +} + +// Verify peaky jitter for the capture. +TEST(ApiCallJitterMetrics, JitterPeakCapture) { + constexpr int kMinJitter = 2; + constexpr int kJitterPeak = 10; + constexpr int kPeakInterval = 100; + + ApiCallJitterMetrics metrics; + int capture_surplus = kMinJitter; + + for (size_t k = 0; k < 30 * kNumBlocksPerSecond; ++k) { + ASSERT_LE(kMinJitter, capture_surplus); + const int num_render_calls = + capture_surplus == kMinJitter ? kMinJitter : kMinJitter + 1; + for (int j = 0; j < num_render_calls; ++j) { + metrics.ReportRenderCall(); + --capture_surplus; + } + + const int num_capture_calls = + k % kPeakInterval == 0 ? kJitterPeak : kMinJitter; + for (int j = 0; j < num_capture_calls; ++j) { + metrics.ReportCaptureCall(); + + if (metrics.WillReportMetricsAtNextCapture()) { + EXPECT_EQ(kMinJitter, metrics.render_jitter().min()); + EXPECT_EQ(kMinJitter + 1, metrics.render_jitter().max()); + EXPECT_EQ(kMinJitter, metrics.capture_jitter().min()); + EXPECT_EQ(kJitterPeak, metrics.capture_jitter().max()); + } + ++capture_surplus; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block.h b/third_party/libwebrtc/modules/audio_processing/aec3/block.h new file mode 100644 index 0000000000..c1fc70722d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Contains one or more channels of 4 milliseconds of audio data. +// The audio is split in one or more frequency bands, each with a sampling +// rate of 16 kHz. +class Block { + public: + Block(int num_bands, int num_channels, float default_value = 0.0f) + : num_bands_(num_bands), + num_channels_(num_channels), + data_(num_bands * num_channels * kBlockSize, default_value) {} + + // Returns the number of bands. + int NumBands() const { return num_bands_; } + + // Returns the number of channels. + int NumChannels() const { return num_channels_; } + + // Modifies the number of channels and sets all samples to zero. + void SetNumChannels(int num_channels) { + num_channels_ = num_channels; + data_.resize(num_bands_ * num_channels_ * kBlockSize); + std::fill(data_.begin(), data_.end(), 0.0f); + } + + // Iterators for accessing the data. + auto begin(int band, int channel) { + return data_.begin() + GetIndex(band, channel); + } + + auto begin(int band, int channel) const { + return data_.begin() + GetIndex(band, channel); + } + + auto end(int band, int channel) { return begin(band, channel) + kBlockSize; } + + auto end(int band, int channel) const { + return begin(band, channel) + kBlockSize; + } + + // Access data via ArrayView. + rtc::ArrayView View(int band, int channel) { + return rtc::ArrayView(&data_[GetIndex(band, channel)], + kBlockSize); + } + + rtc::ArrayView View(int band, int channel) const { + return rtc::ArrayView( + &data_[GetIndex(band, channel)], kBlockSize); + } + + // Lets two Blocks swap audio data. + void Swap(Block& b) { + std::swap(num_bands_, b.num_bands_); + std::swap(num_channels_, b.num_channels_); + data_.swap(b.data_); + } + + private: + // Returns the index of the first sample of the requested |band| and + // |channel|. + int GetIndex(int band, int channel) const { + return (band * num_channels_ + channel) * kBlockSize; + } + + int num_bands_; + int num_channels_; + std::vector data_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc new file mode 100644 index 0000000000..289c3f0d10 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.cc @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_buffer.h" + +#include + +namespace webrtc { + +BlockBuffer::BlockBuffer(size_t size, size_t num_bands, size_t num_channels) + : size(static_cast(size)), + buffer(size, Block(num_bands, num_channels)) {} + +BlockBuffer::~BlockBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h new file mode 100644 index 0000000000..3489d51646 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_buffer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/aec3/block.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of two dimensional vector objects +// together with the read and write indices. +struct BlockBuffer { + BlockBuffer(size_t size, size_t num_bands, size_t num_channels); + ~BlockBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + RTC_DCHECK_GE(size, offset); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc new file mode 100644 index 0000000000..059bbafcdb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/block_delay_buffer.h" + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockDelayBuffer::BlockDelayBuffer(size_t num_channels, + size_t num_bands, + size_t frame_length, + size_t delay_samples) + : frame_length_(frame_length), + delay_(delay_samples), + buf_(num_channels, + std::vector>(num_bands, + std::vector(delay_, 0.f))) {} + +BlockDelayBuffer::~BlockDelayBuffer() = default; + +void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) { + RTC_DCHECK_EQ(buf_.size(), frame->num_channels()); + if (delay_ == 0) { + return; + } + + const size_t num_bands = buf_[0].size(); + const size_t num_channels = buf_.size(); + + const size_t i_start = last_insert_; + size_t i = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands()); + RTC_DCHECK_EQ(buf_[ch].size(), num_bands); + rtc::ArrayView frame_ch(frame->split_bands(ch), num_bands); + const size_t delay = delay_; + + for (size_t band = 0; band < num_bands; ++band) { + RTC_DCHECK_EQ(delay_, buf_[ch][band].size()); + i = i_start; + + // Offloading these pointers and class variables to local variables allows + // the compiler to optimize the below loop when compiling with + // '-fno-strict-aliasing'. + float* buf_ch_band = buf_[ch][band].data(); + float* frame_ch_band = frame_ch[band]; + + for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) { + const float tmp = buf_ch_band[i]; + buf_ch_band[i] = frame_ch_band[k]; + frame_ch_band[k] = tmp; + + i = i < delay - 1 ? i + 1 : 0; + } + } + } + + last_insert_ = i; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h new file mode 100644 index 0000000000..711a790bfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/audio_buffer.h" + +namespace webrtc { + +// Class for applying a fixed delay to the samples in a signal partitioned using +// the audiobuffer band-splitting scheme. +class BlockDelayBuffer { + public: + BlockDelayBuffer(size_t num_channels, + size_t num_bands, + size_t frame_length, + size_t delay_samples); + ~BlockDelayBuffer(); + + // Delays the samples by the specified delay. + void DelaySignal(AudioBuffer* frame); + + private: + const size_t frame_length_; + const size_t delay_; + std::vector>> buf_; + size_t last_insert_ = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc new file mode 100644 index 0000000000..011ab49651 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_delay_buffer_unittest.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_delay_buffer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +float SampleValue(size_t sample_index) { + return sample_index % 32768; +} + +// Populates the frame with linearly increasing sample values for each band. +void PopulateInputFrame(size_t frame_length, + size_t num_bands, + size_t first_sample_index, + float* const* frame) { + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + frame[k][i] = SampleValue(first_sample_index + i); + } + } +} + +std::string ProduceDebugText(int sample_rate_hz, size_t delay) { + char log_stream_buffer[8 * 1024]; + rtc::SimpleStringBuilder ss(log_stream_buffer); + ss << "Sample rate: " << sample_rate_hz; + ss << ", Delay: " << delay; + return ss.str(); +} + +} // namespace + +class BlockDelayBufferTest + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + ParameterCombinations, + BlockDelayBufferTest, + ::testing::Combine(::testing::Values(0, 1, 27, 160, 4321, 7021), + ::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4))); + +// Verifies that the correct signal delay is achived. +TEST_P(BlockDelayBufferTest, CorrectDelayApplied) { + const size_t delay = std::get<0>(GetParam()); + const int rate = std::get<1>(GetParam()); + const size_t num_channels = std::get<2>(GetParam()); + + SCOPED_TRACE(ProduceDebugText(rate, delay)); + size_t num_bands = NumBandsForRate(rate); + size_t subband_frame_length = 160; + + BlockDelayBuffer delay_buffer(num_channels, num_bands, subband_frame_length, + delay); + + static constexpr size_t kNumFramesToProcess = 20; + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + AudioBuffer audio_buffer(rate, num_channels, rate, num_channels, rate, + num_channels); + if (rate > 16000) { + audio_buffer.SplitIntoFrequencyBands(); + } + size_t first_sample_index = frame_index * subband_frame_length; + for (size_t ch = 0; ch < num_channels; ++ch) { + PopulateInputFrame(subband_frame_length, num_bands, first_sample_index, + &audio_buffer.split_bands(ch)[0]); + } + delay_buffer.DelaySignal(&audio_buffer); + + for (size_t ch = 0; ch < num_channels; ++ch) { + for (size_t band = 0; band < num_bands; ++band) { + size_t sample_index = first_sample_index; + for (size_t i = 0; i < subband_frame_length; ++i, ++sample_index) { + if (sample_index < delay) { + EXPECT_EQ(0.f, audio_buffer.split_bands(ch)[band][i]); + } else { + EXPECT_EQ(SampleValue(sample_index - delay), + audio_buffer.split_bands(ch)[band][i]); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc new file mode 100644 index 0000000000..4243ddeba0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockFramer::BlockFramer(size_t num_bands, size_t num_channels) + : num_bands_(num_bands), + num_channels_(num_channels), + buffer_(num_bands_, + std::vector>( + num_channels, + std::vector(kBlockSize, 0.f))) { + RTC_DCHECK_LT(0, num_bands); + RTC_DCHECK_LT(0, num_channels); +} + +BlockFramer::~BlockFramer() = default; + +// All the constants are chosen so that the buffer is either empty or has enough +// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to +// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need +// to be called in the correct order. +void BlockFramer::InsertBlock(const Block& block) { + RTC_DCHECK_EQ(num_bands_, block.NumBands()); + RTC_DCHECK_EQ(num_channels_, block.NumChannels()); + for (size_t band = 0; band < num_bands_; ++band) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_EQ(0, buffer_[band][channel].size()); + + buffer_[band][channel].insert(buffer_[band][channel].begin(), + block.begin(band, channel), + block.end(band, channel)); + } + } +} + +void BlockFramer::InsertBlockAndExtractSubFrame( + const Block& block, + std::vector>>* sub_frame) { + RTC_DCHECK(sub_frame); + RTC_DCHECK_EQ(num_bands_, block.NumBands()); + RTC_DCHECK_EQ(num_channels_, block.NumChannels()); + RTC_DCHECK_EQ(num_bands_, sub_frame->size()); + for (size_t band = 0; band < num_bands_; ++band) { + RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_LE(kSubFrameLength, + buffer_[band][channel].size() + kBlockSize); + RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size()); + RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size()); + + const int samples_to_frame = + kSubFrameLength - buffer_[band][channel].size(); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + (*sub_frame)[band][channel].begin()); + std::copy( + block.begin(band, channel), + block.begin(band, channel) + samples_to_frame, + (*sub_frame)[band][channel].begin() + buffer_[band][channel].size()); + buffer_[band][channel].clear(); + buffer_[band][channel].insert( + buffer_[band][channel].begin(), + block.begin(band, channel) + samples_to_frame, + block.end(band, channel)); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h new file mode 100644 index 0000000000..e2cdd5a17c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Class for producing frames consisting of 2 subframes of 80 samples each +// from 64 sample blocks. The class is designed to work together with the +// FrameBlocker class which performs the reverse conversion. Used together with +// that, this class produces output frames are the same rate as frames are +// received by the FrameBlocker class. Note that the internal buffers will +// overrun if any other rate of packets insertion is used. +class BlockFramer { + public: + BlockFramer(size_t num_bands, size_t num_channels); + ~BlockFramer(); + BlockFramer(const BlockFramer&) = delete; + BlockFramer& operator=(const BlockFramer&) = delete; + + // Adds a 64 sample block into the data that will form the next output frame. + void InsertBlock(const Block& block); + // Adds a 64 sample block and extracts an 80 sample subframe. + void InsertBlockAndExtractSubFrame( + const Block& block, + std::vector>>* sub_frame); + + private: + const size_t num_bands_; + const size_t num_channels_; + std::vector>> buffer_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc new file mode 100644 index 0000000000..9439623f72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_framer_unittest.cc @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void SetupSubFrameView( + std::vector>>* sub_frame, + std::vector>>* sub_frame_view) { + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[band].size(); + ++channel) { + (*sub_frame_view)[band][channel] = + rtc::ArrayView((*sub_frame)[band][channel].data(), + (*sub_frame)[band][channel].size()); + } + } +} + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t channel, + size_t sample_index, + int offset) { + float value = static_cast(100 + chunk_counter * chunk_size + + sample_index + channel) + + offset; + return 5000 * band + value; +} + +bool VerifySubFrame( + size_t sub_frame_counter, + int offset, + const std::vector>>& sub_frame_view) { + for (size_t band = 0; band < sub_frame_view.size(); ++band) { + for (size_t channel = 0; channel < sub_frame_view[band].size(); ++channel) { + for (size_t sample = 0; sample < sub_frame_view[band][channel].size(); + ++sample) { + const float reference_value = ComputeSampleValue( + sub_frame_counter, kSubFrameLength, band, channel, sample, offset); + if (reference_value != sub_frame_view[band][channel][sample]) { + return false; + } + } + } + } + return true; +} + +void FillBlock(size_t block_counter, Block* block) { + for (int band = 0; band < block->NumBands(); ++band) { + for (int channel = 0; channel < block->NumChannels(); ++channel) { + auto b = block->View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + b[sample] = ComputeSampleValue(block_counter, kBlockSize, band, channel, + sample, 0); + } + } + } +} + +// Verifies that the BlockFramer is able to produce the expected frame content. +void RunFramerTest(int sample_rate_hz, size_t num_channels) { + constexpr size_t kNumSubFramesToProcess = 10; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> output_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + num_bands, std::vector>(num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(num_bands, num_channels); + + size_t block_index = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillBlock(block_index++, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + if (sub_frame_index > 1) { + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + } + + if ((sub_frame_index + 1) % 4 == 0) { + FillBlock(block_index++, &block); + framer.InsertBlock(block); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the BlockFramer crashes if the InsertBlockAndExtractSubFrame +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest( + int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels, + size_t num_sub_frame_bands, + size_t num_sub_frame_channels, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_block_bands, num_block_channels); + std::vector>> output_sub_frame( + num_sub_frame_bands, + std::vector>( + num_sub_frame_channels, std::vector(sub_frame_length, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(num_sub_frame_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, correct_num_channels); + EXPECT_DEATH( + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called for +// inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertParameterTest(int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block correct_block(correct_num_bands, correct_num_channels); + Block wrong_block(num_block_bands, num_block_channels); + std::vector>> output_sub_frame( + correct_num_bands, + std::vector>( + correct_num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(correct_num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, correct_num_channels); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + + EXPECT_DEATH(framer.InsertBlock(wrong_block), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called +// after a wrong number of previous InsertBlockAndExtractSubFrame method calls +// have been made. + +void RunWronglyInsertOrderTest(int sample_rate_hz, + size_t num_channels, + size_t num_preceeding_api_calls) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(correct_num_bands, num_channels); + std::vector>> output_sub_frame( + correct_num_bands, + std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + output_sub_frame.size(), + std::vector>(num_channels)); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands, num_channels); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + } + + EXPECT_DEATH(framer.InsertBlock(block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz, size_t num_channels) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + ss << ", number of channels: " << num_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(BlockFramerDeathTest, + WrongNumberOfBandsInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfChannelsInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfBandsInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + wrong_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfChannelsInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, wrong_num_channels, kSubFrameLength); + } + } +} + +TEST(BlockFramerDeathTest, + WrongNumberOfSamplesInSubFrameForInsertBlockAndExtractSubFrame) { + const size_t correct_num_channels = 1; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength - 1); + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfBandsInBlockForInsertBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertParameterTest(rate, correct_num_channels, + wrong_num_bands, correct_num_channels); + } + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfChannelsInBlockForInsertBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (auto correct_num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertParameterTest(rate, correct_num_channels, + correct_num_bands, wrong_num_channels); + } + } +} + +TEST(BlockFramerDeathTest, WrongNumberOfPreceedingApiCallsForInsertBlock) { + for (size_t num_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + rtc::StringBuilder ss; + ss << "Sample rate: " << rate; + ss << ", Num channels: " << num_channels; + ss << ", Num preceeding InsertBlockAndExtractSubFrame calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWronglyInsertOrderTest(rate, num_channels, num_calls); + } + } + } +} + +// Verifies that the verification for 0 number of channels works. +TEST(BlockFramerDeathTest, ZeroNumberOfChannelsParameter) { + EXPECT_DEATH(BlockFramer(16000, 0), ""); +} + +// Verifies that the verification for 0 number of bands works. +TEST(BlockFramerDeathTest, ZeroNumberOfBandsParameter) { + EXPECT_DEATH(BlockFramer(0, 1), ""); +} + +// Verifies that the verification for null sub_frame pointer works. +TEST(BlockFramerDeathTest, NullSubFrameParameter) { + EXPECT_DEATH( + BlockFramer(1, 1).InsertBlockAndExtractSubFrame(Block(1, 1), nullptr), + ""); +} + +#endif + +TEST(BlockFramer, FrameBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + for (auto num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunFramerTest(rate, num_channels); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc new file mode 100644 index 0000000000..63e3d9cc7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.cc @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/block_processor.h" + +#include + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor_metrics.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +enum class BlockProcessorApiCall { kCapture, kRender }; + +class BlockProcessorImpl final : public BlockProcessor { + public: + BlockProcessorImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover); + + BlockProcessorImpl() = delete; + + ~BlockProcessorImpl() override; + + void ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) override; + + void BufferRender(const Block& block) override; + + void UpdateEchoLeakageStatus(bool leakage_detected) override; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + void SetAudioBufferDelay(int delay_ms) override; + void SetCaptureOutputUsage(bool capture_output_used) override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + bool capture_properly_started_ = false; + bool render_properly_started_ = false; + const size_t sample_rate_hz_; + std::unique_ptr render_buffer_; + std::unique_ptr delay_controller_; + std::unique_ptr echo_remover_; + BlockProcessorMetrics metrics_; + RenderDelayBuffer::BufferingEvent render_event_; + size_t capture_call_counter_ = 0; + absl::optional estimated_delay_; +}; + +std::atomic BlockProcessorImpl::instance_count_(0); + +BlockProcessorImpl::BlockProcessorImpl( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(config), + sample_rate_hz_(sample_rate_hz), + render_buffer_(std::move(render_buffer)), + delay_controller_(std::move(delay_controller)), + echo_remover_(std::move(echo_remover)), + render_event_(RenderDelayBuffer::BufferingEvent::kNone) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); +} + +BlockProcessorImpl::~BlockProcessorImpl() = default; + +void BlockProcessorImpl::ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) { + RTC_DCHECK(capture_block); + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->NumBands()); + + capture_call_counter_++; + + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast(BlockProcessorApiCall::kCapture)); + data_dumper_->DumpWav("aec3_processblock_capture_input", + capture_block->View(/*band=*/0, /*channel=*/0), 16000, + 1); + + if (render_properly_started_) { + if (!capture_properly_started_) { + capture_properly_started_ = true; + render_buffer_->Reset(); + if (delay_controller_) + delay_controller_->Reset(true); + } + } else { + // If no render data has yet arrived, do not process the capture signal. + render_buffer_->HandleSkippedCaptureProcessing(); + return; + } + + EchoPathVariability echo_path_variability( + echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone, + false); + + if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun && + render_properly_started_) { + echo_path_variability.delay_change = + EchoPathVariability::DelayAdjustment::kBufferFlush; + if (delay_controller_) + delay_controller_->Reset(true); + RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block " + << capture_call_counter_; + } + render_event_ = RenderDelayBuffer::BufferingEvent::kNone; + + // Update the render buffers with any newly arrived render blocks and prepare + // the render buffers for reading the render data corresponding to the current + // capture block. + RenderDelayBuffer::BufferingEvent buffer_event = + render_buffer_->PrepareCaptureProcessing(); + // Reset the delay controller at render buffer underrun. + if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) { + if (delay_controller_) + delay_controller_->Reset(false); + } + + data_dumper_->DumpWav("aec3_processblock_capture_input2", + capture_block->View(/*band=*/0, /*channel=*/0), 16000, + 1); + + bool has_delay_estimator = !config_.delay.use_external_delay_estimator; + if (has_delay_estimator) { + RTC_DCHECK(delay_controller_); + // Compute and apply the render delay required to achieve proper signal + // alignment. + estimated_delay_ = delay_controller_->GetDelay( + render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(), + *capture_block); + + if (estimated_delay_) { + bool delay_change = + render_buffer_->AlignFromDelay(estimated_delay_->delay); + if (delay_change) { + rtc::LoggingSeverity log_level = + config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING + : rtc::LS_INFO; + RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay + << " at block " << capture_call_counter_; + echo_path_variability.delay_change = + EchoPathVariability::DelayAdjustment::kNewDetectedDelay; + } + } + + echo_path_variability.clock_drift = delay_controller_->HasClockdrift(); + + } else { + render_buffer_->AlignFromExternalDelay(); + } + + // Remove the echo from the capture signal. + if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) { + echo_remover_->ProcessCapture( + echo_path_variability, capture_signal_saturation, estimated_delay_, + render_buffer_->GetRenderBuffer(), linear_output, capture_block); + } + + // Update the metrics. + metrics_.UpdateCapture(false); +} + +void BlockProcessorImpl::BufferRender(const Block& block) { + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.NumBands()); + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast(BlockProcessorApiCall::kRender)); + data_dumper_->DumpWav("aec3_processblock_render_input", + block.View(/*band=*/0, /*channel=*/0), 16000, 1); + + render_event_ = render_buffer_->Insert(block); + + metrics_.UpdateRender(render_event_ != + RenderDelayBuffer::BufferingEvent::kNone); + + render_properly_started_ = true; + if (delay_controller_) + delay_controller_->LogRenderCall(); +} + +void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) { + echo_remover_->UpdateEchoLeakageStatus(leakage_detected); +} + +void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const { + echo_remover_->GetMetrics(metrics); + constexpr int block_size_ms = 4; + absl::optional delay = render_buffer_->Delay(); + metrics->delay_ms = delay ? static_cast(*delay) * block_size_ms : 0; +} + +void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) { + render_buffer_->SetAudioBufferDelay(delay_ms); +} + +void BlockProcessorImpl::SetCaptureOutputUsage(bool capture_output_used) { + echo_remover_->SetCaptureOutputUsage(capture_output_used); +} + +} // namespace + +BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) { + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels)); + std::unique_ptr delay_controller; + if (!config.delay.use_external_delay_estimator) { + delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz, + num_capture_channels)); + } + std::unique_ptr echo_remover(EchoRemover::Create( + config, sample_rate_hz, num_render_channels, num_capture_channels)); + return Create(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer) { + std::unique_ptr delay_controller; + if (!config.delay.use_external_delay_estimator) { + delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz, + num_capture_channels)); + } + std::unique_ptr echo_remover(EchoRemover::Create( + config, sample_rate_hz, num_render_channels, num_capture_channels)); + return Create(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover) { + return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels, + num_capture_channels, std::move(render_buffer), + std::move(delay_controller), + std::move(echo_remover)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h new file mode 100644 index 0000000000..01a83ae5f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ + +#include + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" + +namespace webrtc { + +// Class for performing echo cancellation on 64 sample blocks of audio data. +class BlockProcessor { + public: + static BlockProcessor* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + // Only used for testing purposes. + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer); + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels, + std::unique_ptr render_buffer, + std::unique_ptr delay_controller, + std::unique_ptr echo_remover); + + virtual ~BlockProcessor() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Provides an optional external estimate of the audio buffer delay. + virtual void SetAudioBufferDelay(int delay_ms) = 0; + + // Processes a block of capture data. + virtual void ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + Block* linear_output, + Block* capture_block) = 0; + + // Buffers a block of render data supplied by a FrameBlocker object. + virtual void BufferRender(const Block& render_block) = 0; + + // Reports whether echo leakage has been detected in the echo canceller + // output. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the block processor to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + virtual void SetCaptureOutputUsage(bool capture_output_used) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc new file mode 100644 index 0000000000..deac1fcd22 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class RenderUnderrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +enum class RenderOverrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +void BlockProcessorMetrics::UpdateCapture(bool underrun) { + ++capture_block_counter_; + if (underrun) { + ++render_buffer_underruns_; + } + + if (capture_block_counter_ == kMetricsReportingIntervalBlocks) { + metrics_reported_ = true; + + RenderUnderrunCategory underrun_category; + if (render_buffer_underruns_ == 0) { + underrun_category = RenderUnderrunCategory::kNone; + } else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) { + underrun_category = RenderUnderrunCategory::kConstant; + } else if (render_buffer_underruns_ > 100) { + underrun_category = RenderUnderrunCategory::kMany; + } else if (render_buffer_underruns_ > 10) { + underrun_category = RenderUnderrunCategory::kSeveral; + } else { + underrun_category = RenderUnderrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderUnderruns", + static_cast(underrun_category), + static_cast(RenderUnderrunCategory::kNumCategories)); + + RenderOverrunCategory overrun_category; + if (render_buffer_overruns_ == 0) { + overrun_category = RenderOverrunCategory::kNone; + } else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) { + overrun_category = RenderOverrunCategory::kConstant; + } else if (render_buffer_overruns_ > 100) { + overrun_category = RenderOverrunCategory::kMany; + } else if (render_buffer_overruns_ > 10) { + overrun_category = RenderOverrunCategory::kSeveral; + } else { + overrun_category = RenderOverrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderOverruns", + static_cast(overrun_category), + static_cast(RenderOverrunCategory::kNumCategories)); + + ResetMetrics(); + capture_block_counter_ = 0; + } else { + metrics_reported_ = false; + } +} + +void BlockProcessorMetrics::UpdateRender(bool overrun) { + ++buffer_render_calls_; + if (overrun) { + ++render_buffer_overruns_; + } +} + +void BlockProcessorMetrics::ResetMetrics() { + render_buffer_underruns_ = 0; + render_buffer_overruns_ = 0; + buffer_render_calls_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h new file mode 100644 index 0000000000..a70d0dac5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ + +namespace webrtc { + +// Handles the reporting of metrics for the block_processor. +class BlockProcessorMetrics { + public: + BlockProcessorMetrics() = default; + + BlockProcessorMetrics(const BlockProcessorMetrics&) = delete; + BlockProcessorMetrics& operator=(const BlockProcessorMetrics&) = delete; + + // Updates the metric with new capture data. + void UpdateCapture(bool underrun); + + // Updates the metric with new render data. + void UpdateRender(bool overrun); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int capture_block_counter_ = 0; + bool metrics_reported_ = false; + int render_buffer_underruns_ = 0; + int render_buffer_overruns_ = 0; + int buffer_render_calls_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc new file mode 100644 index 0000000000..3e23c2499d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of BlockProcessorMetrics. +TEST(BlockProcessorMetrics, NormalUsage) { + BlockProcessorMetrics metrics; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.UpdateRender(false); + metrics.UpdateRender(false); + metrics.UpdateCapture(false); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.UpdateCapture(false); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc new file mode 100644 index 0000000000..aba5c4186d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/block_processor_unittest.cc @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/mock/mock_echo_remover.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_buffer.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_controller.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::NiceMock; +using ::testing::Return; +using ::testing::StrictMock; + +// Verifies that the basic BlockProcessor functionality works and that the API +// methods are callable. +void RunBasicSetupAndApiCallTest(int sample_rate_hz, int num_iterations) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(NumBandsForRate(sample_rate_hz), kNumRenderChannels, 1000.f); + for (int k = 0; k < num_iterations; ++k) { + block_processor->BufferRender(block); + block_processor->ProcessCapture(false, false, nullptr, &block); + block_processor->UpdateEchoLeakageStatus(false); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +void RunRenderBlockSizeVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(NumBandsForRate(sample_rate_hz), kNumRenderChannels); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunRenderNumBandsVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(wrong_num_bands, kNumRenderChannels); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunCaptureNumBandsVerificationTest(int sample_rate_hz) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz, + kNumRenderChannels, kNumCaptureChannels)); + Block block(wrong_num_bands, kNumRenderChannels); + + EXPECT_DEATH(block_processor->ProcessCapture(false, false, nullptr, &block), + ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +void FillSampleVector(int call_counter, + int delay, + rtc::ArrayView samples) { + for (size_t i = 0; i < samples.size(); ++i) { + samples[i] = (call_counter - delay) * 10000.0f + i; + } +} + +} // namespace + +// Verifies that the delay controller functionality is properly integrated with +// the render delay buffer inside block processor. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST(BlockProcessor, DISABLED_DelayControllerIntegration) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr size_t kNumBlocks = 310; + constexpr size_t kDelayInSamples = 640; + constexpr size_t kDelayHeadroom = 1; + constexpr size_t kDelayInBlocks = + kDelayInSamples / kBlockSize - kDelayHeadroom; + Random random_generator(42U); + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr> + render_delay_buffer_mock( + new StrictMock(rate, 1)); + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks) + .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone)); + EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(kDelayInBlocks)) + .Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, MaxDelay()).WillOnce(Return(30)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks + 1) + .WillRepeatedly(Return(0)); + std::unique_ptr block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, kNumRenderChannels, kNumCaptureChannels, + std::move(render_delay_buffer_mock))); + + Block render_block(NumBandsForRate(rate), kNumRenderChannels); + Block capture_block(NumBandsForRate(rate), kNumCaptureChannels); + DelayBuffer signal_delay_buffer(kDelayInSamples); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, + render_block.View(/*band=*/0, /*capture=*/0)); + signal_delay_buffer.Delay(render_block.View(/*band=*/0, /*capture=*/0), + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + } + } +} + +// Verifies that BlockProcessor submodules are called in a proper manner. +TEST(BlockProcessor, DISABLED_SubmoduleIntegration) { + constexpr size_t kNumBlocks = 310; + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + + Random random_generator(42U); + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr> + render_delay_buffer_mock( + new StrictMock(rate, 1)); + std::unique_ptr< + ::testing::StrictMock> + render_delay_controller_mock( + new StrictMock()); + std::unique_ptr> + echo_remover_mock(new StrictMock()); + + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks - 1) + .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone)); + EXPECT_CALL(*render_delay_buffer_mock, PrepareCaptureProcessing()) + .Times(kNumBlocks); + EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(9)).Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks) + .WillRepeatedly(Return(0)); + EXPECT_CALL(*render_delay_controller_mock, GetDelay(_, _, _)) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, ProcessCapture(_, _, _, _, _, _)) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, UpdateEchoLeakageStatus(_)) + .Times(kNumBlocks); + + std::unique_ptr block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, kNumRenderChannels, kNumCaptureChannels, + std::move(render_delay_buffer_mock), + std::move(render_delay_controller_mock), std::move(echo_remover_mock))); + + Block render_block(NumBandsForRate(rate), kNumRenderChannels); + Block capture_block(NumBandsForRate(rate), kNumCaptureChannels); + DelayBuffer signal_delay_buffer(640); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, + render_block.View(/*band=*/0, /*capture=*/0)); + signal_delay_buffer.Delay(render_block.View(/*band=*/0, /*capture=*/0), + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + block_processor->UpdateEchoLeakageStatus(false); + } + } +} + +TEST(BlockProcessor, BasicSetupAndApiCalls) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunBasicSetupAndApiCallTest(rate, 1); + } +} + +TEST(BlockProcessor, TestLongerCall) { + RunBasicSetupAndApiCallTest(16000, 20 * kNumBlocksPerSecond); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// TODO(gustaf): Re-enable the test once the issue with memory leaks during +// DEATH tests on test bots has been fixed. +TEST(BlockProcessorDeathTest, DISABLED_VerifyRenderBlockSizeCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderBlockSizeVerificationTest(rate); + } +} + +TEST(BlockProcessorDeathTest, VerifyRenderNumBandsCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderNumBandsVerificationTest(rate); + } +} + +// TODO(peah): Verify the check for correct number of bands in the capture +// signal. +TEST(BlockProcessorDeathTest, VerifyCaptureNumBandsCheck) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunCaptureNumBandsVerificationTest(rate); + } +} + +// Verifiers that the verification for null ProcessCapture input works. +TEST(BlockProcessorDeathTest, NullProcessCaptureParameter) { + EXPECT_DEATH(std::unique_ptr( + BlockProcessor::Create(EchoCanceller3Config(), 16000, 1, 1)) + ->ProcessCapture(false, false, nullptr, nullptr), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(BlockProcessor, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr( + BlockProcessor::Create(EchoCanceller3Config(), 8001, 1, 1)), + ""); +} + +#endif + +// Verifies that external delay estimator delays are applied correctly when a +// call begins with a sequence of capture blocks. +TEST(BlockProcessor, ExternalDelayAppliedCorrectlyWithInitialCaptureCalls) { + constexpr int kNumRenderChannels = 1; + constexpr int kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 16000; + + EchoCanceller3Config config; + config.delay.use_external_delay_estimator = true; + + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + + std::unique_ptr> + echo_remover_mock(new NiceMock()); + webrtc::test::MockEchoRemover* echo_remover_mock_pointer = + echo_remover_mock.get(); + + std::unique_ptr block_processor(BlockProcessor::Create( + config, kSampleRateHz, kNumRenderChannels, kNumCaptureChannels, + std::move(delay_buffer), /*delay_controller=*/nullptr, + std::move(echo_remover_mock))); + + Block render_block(NumBandsForRate(kSampleRateHz), kNumRenderChannels); + Block capture_block(NumBandsForRate(kSampleRateHz), kNumCaptureChannels); + + // Process... + // - 10 capture calls, where no render data is available, + // - 10 render calls, populating the buffer, + // - 2 capture calls, verifying that the delay was applied correctly. + constexpr int kDelayInBlocks = 5; + constexpr int kDelayInMs = 20; + block_processor->SetAudioBufferDelay(kDelayInMs); + + int capture_call_counter = 0; + int render_call_counter = 0; + for (size_t k = 0; k < 10; ++k) { + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + } + for (size_t k = 0; k < 10; ++k) { + FillSampleVector(++render_call_counter, 0, + render_block.View(/*band=*/0, /*capture=*/0)); + block_processor->BufferRender(render_block); + } + + EXPECT_CALL(*echo_remover_mock_pointer, ProcessCapture) + .WillRepeatedly( + [](EchoPathVariability /*echo_path_variability*/, + bool /*capture_signal_saturation*/, + const absl::optional& /*external_delay*/, + RenderBuffer* render_buffer, Block* /*linear_output*/, + Block* capture) { + const auto& render = render_buffer->GetBlock(0); + const auto render_view = render.View(/*band=*/0, /*channel=*/0); + const auto capture_view = capture->View(/*band=*/0, /*channel=*/0); + for (size_t i = 0; i < kBlockSize; ++i) { + EXPECT_FLOAT_EQ(render_view[i], capture_view[i]); + } + }); + + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); + + FillSampleVector(++capture_call_counter, kDelayInBlocks, + capture_block.View(/*band=*/0, /*capture=*/0)); + block_processor->ProcessCapture(false, false, nullptr, &capture_block); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc new file mode 100644 index 0000000000..2c49b795c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +namespace webrtc { + +ClockdriftDetector::ClockdriftDetector() + : level_(Level::kNone), stability_counter_(0) { + delay_history_.fill(0); +} + +ClockdriftDetector::~ClockdriftDetector() = default; + +void ClockdriftDetector::Update(int delay_estimate) { + if (delay_estimate == delay_history_[0]) { + // Reset clockdrift level if delay estimate is stable for 7500 blocks (30 + // seconds). + if (++stability_counter_ > 7500) + level_ = Level::kNone; + return; + } + + stability_counter_ = 0; + const int d1 = delay_history_[0] - delay_estimate; + const int d2 = delay_history_[1] - delay_estimate; + const int d3 = delay_history_[2] - delay_estimate; + + // Patterns recognized as positive clockdrift: + // [x-3], x-2, x-1, x. + // [x-3], x-1, x-2, x. + const bool probable_drift_up = + (d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1); + const bool drift_up = probable_drift_up && d3 == -3; + + // Patterns recognized as negative clockdrift: + // [x+3], x+2, x+1, x. + // [x+3], x+1, x+2, x. + const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1); + const bool drift_down = probable_drift_down && d3 == 3; + + // Set clockdrift level. + if (drift_up || drift_down) { + level_ = Level::kVerified; + } else if ((probable_drift_up || probable_drift_down) && + level_ == Level::kNone) { + level_ = Level::kProbable; + } + + // Shift delay history one step. + delay_history_[2] = delay_history_[1]; + delay_history_[1] = delay_history_[0]; + delay_history_[0] = delay_estimate; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h new file mode 100644 index 0000000000..2ba90bb889 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ + +#include + +#include + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; +struct EchoCanceller3Config; + +// Detects clockdrift by analyzing the estimated delay. +class ClockdriftDetector { + public: + enum class Level { kNone, kProbable, kVerified, kNumCategories }; + ClockdriftDetector(); + ~ClockdriftDetector(); + void Update(int delay_estimate); + Level ClockdriftLevel() const { return level_; } + + private: + std::array delay_history_; + Level level_; + size_t stability_counter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc new file mode 100644 index 0000000000..0f98b01d3a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/clockdrift_detector_unittest.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +#include "test/gtest.h" + +namespace webrtc { +TEST(ClockdriftDetector, ClockdriftDetector) { + ClockdriftDetector c; + // No clockdrift at start. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + + // Monotonically increasing delay. + for (int i = 0; i < 100; i++) + c.Update(1000); + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + for (int i = 0; i < 100; i++) + c.Update(1001); + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + for (int i = 0; i < 100; i++) + c.Update(1002); + // Probable clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kProbable); + for (int i = 0; i < 100; i++) + c.Update(1003); + // Verified clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kVerified); + + // Stable delay. + for (int i = 0; i < 10000; i++) + c.Update(1003); + // No clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kNone); + + // Decreasing delay. + for (int i = 0; i < 100; i++) + c.Update(1001); + for (int i = 0; i < 100; i++) + c.Update(999); + // Probable clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kProbable); + for (int i = 0; i < 100; i++) + c.Update(1000); + for (int i = 0; i < 100; i++) + c.Update(998); + // Verified clockdrift. + EXPECT_TRUE(c.ClockdriftLevel() == ClockdriftDetector::Level::kVerified); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc new file mode 100644 index 0000000000..f4fb74d20d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CoarseFilterUpdateGain::CoarseFilterUpdateGain( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + size_t config_change_duration_blocks) + : config_change_duration_blocks_( + static_cast(config_change_duration_blocks)) { + SetConfig(config, true); + RTC_DCHECK_LT(0, config_change_duration_blocks_); + one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_; +} + +void CoarseFilterUpdateGain::HandleEchoPathChange() { + poor_signal_excitation_counter_ = 0; + call_counter_ = 0; +} + +void CoarseFilterUpdateGain::Compute( + const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_coarse, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G) { + RTC_DCHECK(G); + ++call_counter_; + + UpdateCurrentConfig(); + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_signal_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_signal_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + return; + } + + // Compute mu. + std::array mu; + const auto& X2 = render_power; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (X2[k] > current_config_.noise_gate) { + mu[k] = current_config_.rate / X2[k]; + } else { + mu[k] = 0.f; + } + } + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // G = mu * E * X2. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + G->re[k] = mu[k] * E_coarse.re[k]; + G->im[k] = mu[k] * E_coarse.im[k]; + } +} + +void CoarseFilterUpdateGain::UpdateCurrentConfig() { + RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_); + if (config_change_counter_ > 0) { + if (--config_change_counter_ > 0) { + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + config_change_counter_ * one_by_config_change_duration_blocks_; + + current_config_.rate = + average(old_target_config_.rate, target_config_.rate, change_factor); + current_config_.noise_gate = + average(old_target_config_.noise_gate, target_config_.noise_gate, + change_factor); + } else { + current_config_ = old_target_config_ = target_config_; + } + } + RTC_DCHECK_LE(0, config_change_counter_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h new file mode 100644 index 0000000000..a1a1399b2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ + +#include + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +namespace webrtc { + +// Provides functionality for computing the fixed gain for the coarse filter. +class CoarseFilterUpdateGain { + public: + explicit CoarseFilterUpdateGain( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + size_t config_change_duration_blocks); + + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(); + + // Computes the gain. + void Compute(const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_coarse, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G); + + // Sets a new config. + void SetConfig( + const EchoCanceller3Config::Filter::CoarseConfiguration& config, + bool immediate_effect) { + if (immediate_effect) { + old_target_config_ = current_config_ = target_config_ = config; + config_change_counter_ = 0; + } else { + old_target_config_ = current_config_; + target_config_ = config; + config_change_counter_ = config_change_duration_blocks_; + } + } + + private: + EchoCanceller3Config::Filter::CoarseConfiguration current_config_; + EchoCanceller3Config::Filter::CoarseConfiguration target_config_; + EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_; + const int config_change_duration_blocks_; + float one_by_config_change_duration_blocks_; + // TODO(peah): Check whether this counter should instead be initialized to a + // large value. + size_t poor_signal_excitation_counter_ = 0; + size_t call_counter_ = 0; + int config_change_counter_ = 0; + + void UpdateCurrentConfig(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..55b79bb812 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/coarse_filter_update_gain_unittest.cc @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +// Method for performing the simulations needed to test the refined filter +// update gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + size_t num_render_channels, + int filter_length_blocks, + const std::vector& blocks_with_saturation, + std::array* e_last_block, + std::array* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + config.filter.refined.length_blocks = filter_length_blocks; + AdaptiveFirFilter refined_filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + AdaptiveFirFilter coarse_filter( + config.filter.coarse.length_blocks, config.filter.coarse.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + DetectOptimization(), &data_dumper); + Aec3Fft fft; + + constexpr int kSampleRateHz = 48000; + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + CoarseFilterUpdateGain coarse_gain( + config.filter.coarse, config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(NumBandsForRate(kSampleRateHz), num_render_channels); + std::array y; + RenderSignalAnalyzer render_signal_analyzer(config); + std::array s; + FftData S; + FftData G; + FftData E_coarse; + std::array e_coarse; + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle saturation. + bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + delay_buffer.Delay(x.View(/*band=*/0, /*channel*/ 0), y); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + delay_samples / kBlockSize); + + coarse_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s); + std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, + e_coarse.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_coarse.begin(), e_coarse.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kRectangular, &E_coarse); + + std::array render_power; + render_delay_buffer->GetRenderBuffer()->SpectralSum( + coarse_filter.SizePartitions(), &render_power); + coarse_gain.Compute(render_power, render_signal_analyzer, E_coarse, + coarse_filter.SizePartitions(), saturation, &G); + coarse_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G); + } + + std::copy(e_coarse.begin(), e_coarse.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Length: " << filter_length_blocks; + return ss.Release(); +} + +std::string ProduceDebugText(size_t delay, int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Delay: " << delay << ", "; + ss << ProduceDebugText(filter_length_blocks); + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(CoarseFilterUpdateGainDeathTest, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + FftBuffer fft_buffer(1, 1); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + FftData E; + const EchoCanceller3Config::Filter::CoarseConfiguration& config = { + 12, 0.5f, 220075344.f}; + CoarseFilterUpdateGain gain(config, 250); + std::array render_power; + render_power.fill(0.f); + EXPECT_DEATH(gain.Compute(render_power, analyzer, E, 1, false, nullptr), ""); +} + +#endif + +class CoarseFilterUpdateGainOneTwoEightRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + CoarseFilterUpdateGainOneTwoEightRenderChannels, + ::testing::Values(1, 2, 8)); + +// Verifies that the gain formed causes the filter using it to converge. +TEST_P(CoarseFilterUpdateGainOneTwoEightRenderChannels, + GainCausesFilterToConverge) { + const size_t num_render_channels = GetParam(); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks)); + + std::array e; + std::array y; + FftData G; + + RunFilterUpdateTest(5000, delay_samples, num_render_channels, + filter_length_blocks, blocks_with_saturation, &e, &y, + &G); + + // Verify that the refined filter is able to perform well. + // Use different criteria to take overmodelling into account. + if (filter_length_blocks == 12) { + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } else { + EXPECT_LT(std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } + } + } +} + +// Verifies that the gain is zero when there is saturation. +TEST_P(CoarseFilterUpdateGainOneTwoEightRenderChannels, SaturationBehavior) { + const size_t num_render_channels = GetParam(); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + + std::array e; + std::array y; + FftData G_a; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + RunFilterUpdateTest(100, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); + } +} + +class CoarseFilterUpdateGainOneTwoFourRenderChannels + : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P( + MultiChannel, + CoarseFilterUpdateGainOneTwoFourRenderChannels, + ::testing::Values(1, 2, 4), + [](const ::testing::TestParamInfo< + CoarseFilterUpdateGainOneTwoFourRenderChannels::ParamType>& info) { + return (rtc::StringBuilder() << "Render" << info.param).str(); + }); + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST_P(CoarseFilterUpdateGainOneTwoFourRenderChannels, DecreasingGain) { + const size_t num_render_channels = GetParam(); + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array G_a_power; + std::array G_b_power; + std::array G_c_power; + + RunFilterUpdateTest(100, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_a); + RunFilterUpdateTest(200, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_b); + RunFilterUpdateTest(300, 65, num_render_channels, filter_length_blocks, + blocks_with_saturation, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc new file mode 100644 index 0000000000..de5227c089 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include +#include +#include +#include +#include + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec3/vector_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Computes the noise floor value that matches a WGN input of noise_floor_dbfs. +float GetNoiseFloorFactor(float noise_floor_dbfs) { + // kdBfsNormalization = 20.f*log10(32768.f). + constexpr float kdBfsNormalization = 90.30899869919436f; + return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f); +} + +// Table of sqrt(2) * sin(2*pi*i/32). +constexpr float kSqrt2Sin[32] = { + +0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f, + +1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f, + +1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f, + +0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f, + -1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f, + -1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f, + -0.5411961f, -0.2758994f}; + +void GenerateComfortNoise(Aec3Optimization optimization, + const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise) { + FftData* N_low = lower_band_noise; + FftData* N_high = upper_band_noise; + + // Compute square root spectrum. + std::array N; + std::copy(N2.begin(), N2.end(), N.begin()); + aec3::VectorMath(optimization).Sqrt(N); + + // Compute the noise level for the upper bands. + constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1); + constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2; + const float high_band_noise_level = + std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) * + kOneByNumBands; + + // The analysis and synthesis windowing cause loss of power when + // cross-fading the noise where frames are completely uncorrelated + // (generated with random phase), hence the factor sqrt(2). + // This is not the case for the speech signal where the input is overlapping + // (strong correlation). + N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] = + N_high->re[kFftLengthBy2] = 0.f; + for (size_t k = 1; k < kFftLengthBy2; k++) { + constexpr int kIndexMask = 32 - 1; + // Generate a random 31-bit integer. + seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1); + // Convert to a 5-bit index. + int i = seed[0] >> 26; + + // y = sqrt(2) * sin(a) + const float x = kSqrt2Sin[i]; + // x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2) + const float y = kSqrt2Sin[(i + 8) & kIndexMask]; + + // Form low-frequency noise via spectral shaping. + N_low->re[k] = N[k] * x; + N_low->im[k] = N[k] * y; + + // Form the high-frequency noise via simple levelling. + N_high->re[k] = high_band_noise_level * x; + N_high->im[k] = high_band_noise_level * y; + } +} + +} // namespace + +ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config, + Aec3Optimization optimization, + size_t num_capture_channels) + : optimization_(optimization), + seed_(42), + num_capture_channels_(num_capture_channels), + noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)), + N2_initial_( + std::make_unique>>( + num_capture_channels_)), + Y2_smoothed_(num_capture_channels_), + N2_(num_capture_channels_) { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + (*N2_initial_)[ch].fill(0.f); + Y2_smoothed_[ch].fill(0.f); + N2_[ch].fill(1.0e6f); + } +} + +ComfortNoiseGenerator::~ComfortNoiseGenerator() = default; + +void ComfortNoiseGenerator::Compute( + bool saturated_capture, + rtc::ArrayView> + capture_spectrum, + rtc::ArrayView lower_band_noise, + rtc::ArrayView upper_band_noise) { + const auto& Y2 = capture_spectrum; + + if (!saturated_capture) { + // Smooth Y2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(), + Y2[ch].begin(), Y2_smoothed_[ch].begin(), + [](float a, float b) { return a + 0.1f * (b - a); }); + } + + if (N2_counter_ > 50) { + // Update N2 from Y2_smoothed. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(), + N2_[ch].begin(), [](float a, float b) { + return b < a ? (0.9f * b + 0.1f * a) * 1.0002f + : a * 1.0002f; + }); + } + } + + if (N2_initial_) { + if (++N2_counter_ == 1000) { + N2_initial_.reset(); + } else { + // Compute the N2_initial from N2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(N2_[ch].begin(), N2_[ch].end(), + (*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(), + [](float a, float b) { + return a > b ? b + 0.001f * (a - b) : a; + }); + } + } + } + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + for (auto& n : N2_[ch]) { + n = std::max(n, noise_floor_); + } + if (N2_initial_) { + for (auto& n : (*N2_initial_)[ch]) { + n = std::max(n, noise_floor_); + } + } + } + } + + // Choose N2 estimate to use. + const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_; + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch], + &upper_band_noise[ch]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h new file mode 100644 index 0000000000..2785b765c5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ + +#include + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace aec3 { +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void EstimateComfortNoise_SSE2(const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); +#endif +void EstimateComfortNoise(const std::array& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); + +} // namespace aec3 + +// Generates the comfort noise. +class ComfortNoiseGenerator { + public: + ComfortNoiseGenerator(const EchoCanceller3Config& config, + Aec3Optimization optimization, + size_t num_capture_channels); + ComfortNoiseGenerator() = delete; + ~ComfortNoiseGenerator(); + ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete; + + // Computes the comfort noise. + void Compute(bool saturated_capture, + rtc::ArrayView> + capture_spectrum, + rtc::ArrayView lower_band_noise, + rtc::ArrayView upper_band_noise); + + // Returns the estimate of the background noise spectrum. + rtc::ArrayView> NoiseSpectrum() + const { + return N2_; + } + + private: + const Aec3Optimization optimization_; + uint32_t seed_; + const size_t num_capture_channels_; + const float noise_floor_; + std::unique_ptr>> + N2_initial_; + std::vector> Y2_smoothed_; + std::vector> N2_; + int N2_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc new file mode 100644 index 0000000000..a9da17559a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "rtc_base/random.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +float Power(const FftData& N) { + std::array N2; + N.Spectrum(Aec3Optimization::kNone, N2); + return std::accumulate(N2.begin(), N2.end(), 0.f) / N2.size(); +} + +} // namespace + +TEST(ComfortNoiseGenerator, CorrectLevel) { + constexpr size_t kNumChannels = 5; + EchoCanceller3Config config; + ComfortNoiseGenerator cng(config, DetectOptimization(), kNumChannels); + AecState aec_state(config, kNumChannels); + + std::vector> N2(kNumChannels); + std::vector n_lower(kNumChannels); + std::vector n_upper(kNumChannels); + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + N2[ch].fill(1000.f * 1000.f / (ch + 1)); + n_lower[ch].re.fill(0.f); + n_lower[ch].im.fill(0.f); + n_upper[ch].re.fill(0.f); + n_upper[ch].im.fill(0.f); + } + + // Ensure instantaneous updata to nonzero noise. + cng.Compute(false, N2, n_lower, n_upper); + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + EXPECT_LT(0.f, Power(n_lower[ch])); + EXPECT_LT(0.f, Power(n_upper[ch])); + } + + for (int k = 0; k < 10000; ++k) { + cng.Compute(false, N2, n_lower, n_upper); + } + + for (size_t ch = 0; ch < kNumChannels; ++ch) { + EXPECT_NEAR(2.f * N2[ch][0], Power(n_lower[ch]), N2[ch][0] / 10.f); + EXPECT_NEAR(2.f * N2[ch][0], Power(n_upper[ch]), N2[ch][0] / 10.f); + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc new file mode 100644 index 0000000000..c55344da79 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.cc @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Validates that the mono and the multichannel configs have compatible fields. +bool CompatibleConfigs(const EchoCanceller3Config& mono_config, + const EchoCanceller3Config& multichannel_config) { + if (mono_config.delay.fixed_capture_delay_samples != + multichannel_config.delay.fixed_capture_delay_samples) { + return false; + } + if (mono_config.filter.export_linear_aec_output != + multichannel_config.filter.export_linear_aec_output) { + return false; + } + if (mono_config.filter.high_pass_filter_echo_reference != + multichannel_config.filter.high_pass_filter_echo_reference) { + return false; + } + if (mono_config.multi_channel.detect_stereo_content != + multichannel_config.multi_channel.detect_stereo_content) { + return false; + } + if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds != + multichannel_config.multi_channel + .stereo_detection_timeout_threshold_seconds) { + return false; + } + return true; +} + +} // namespace + +ConfigSelector::ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels) + : config_(config), multichannel_config_(multichannel_config) { + if (multichannel_config_.has_value()) { + RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_)); + } + + Update(!config_.multi_channel.detect_stereo_content && + num_render_input_channels > 1); + + RTC_DCHECK(active_config_); +} + +void ConfigSelector::Update(bool multichannel_content) { + if (multichannel_content && multichannel_config_.has_value()) { + active_config_ = &(*multichannel_config_); + } else { + active_config_ = &config_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h new file mode 100644 index 0000000000..3b3f94e5ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" + +namespace webrtc { + +// Selects the config to use. +class ConfigSelector { + public: + ConfigSelector( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int num_render_input_channels); + + // Updates the config selection based on the detection of multichannel + // content. + void Update(bool multichannel_content); + + const EchoCanceller3Config& active_config() const { return *active_config_; } + + private: + const EchoCanceller3Config config_; + const absl::optional multichannel_config_; + const EchoCanceller3Config* active_config_ = nullptr; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc new file mode 100644 index 0000000000..1826bfcace --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/config_selector_unittest.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/config_selector.h" + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "test/gtest.h" + +namespace webrtc { + +class ConfigSelectorChannelsAndContentDetection + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannelsAndContentDetection, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(false, true))); + +class ConfigSelectorChannels : public ::testing::Test, + public ::testing::WithParamInterface {}; + +INSTANTIATE_TEST_SUITE_P(ConfigSelectorMultiParameters, + ConfigSelectorChannels, + ::testing::Values(1, 2, 8)); + +TEST_P(ConfigSelectorChannelsAndContentDetection, + MonoConfigIsSelectedWhenNoMultiChannelConfigPresent) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config; + + config.delay.default_delay = config.delay.default_delay + 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); +} + +TEST_P(ConfigSelectorChannelsAndContentDetection, + CorrectInitialConfigIsSelected) { + const auto [num_channels, detect_stereo_content] = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = detect_stereo_content; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + if (num_channels == 1 || detect_stereo_content) { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +TEST_P(ConfigSelectorChannels, CorrectConfigUpdateBehavior) { + const int num_channels = GetParam(); + EchoCanceller3Config config; + config.multi_channel.detect_stereo_content = true; + absl::optional multichannel_config = config; + + config.delay.default_delay += 1; + const size_t custom_delay_value_in_config = config.delay.default_delay; + multichannel_config->delay.default_delay += 2; + const size_t custom_delay_value_in_multichannel_config = + multichannel_config->delay.default_delay; + + ConfigSelector cs(config, multichannel_config, + /*num_render_input_channels=*/num_channels); + + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + + if (num_channels == 1) { + cs.Update(/*multichannel_content=*/false); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_config); + } else { + cs.Update(/*multichannel_content=*/true); + EXPECT_EQ(cs.active_config().delay.default_delay, + custom_delay_value_in_multichannel_config); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc new file mode 100644 index 0000000000..bd03237ca0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/decimator.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// signal.butter(2, 3400/8000.0, 'lowpass', analog=False) +const std::vector GetLowPassFilterDS2() { + return std::vector{ + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}, + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}, + {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}}; +} + +// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False) +const std::vector GetLowPassFilterDS4() { + return std::vector{ + {{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f}, + {{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f}, + {{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}}; +} + +// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False) +const std::vector GetBandPassFilterDS8() { + return std::vector{ + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}, + {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}}; +} + +// signal.butter(2, 1000/8000.0, 'highpass', analog=False) +const std::vector GetHighPassFilter() { + return std::vector{ + {{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}}; +} + +const std::vector GetPassThroughFilter() { + return std::vector{}; +} +} // namespace + +Decimator::Decimator(size_t down_sampling_factor) + : down_sampling_factor_(down_sampling_factor), + anti_aliasing_filter_(down_sampling_factor_ == 4 + ? GetLowPassFilterDS4() + : (down_sampling_factor_ == 8 + ? GetBandPassFilterDS8() + : GetLowPassFilterDS2())), + noise_reduction_filter_(down_sampling_factor_ == 8 + ? GetPassThroughFilter() + : GetHighPassFilter()) { + RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 || + down_sampling_factor_ == 8); +} + +void Decimator::Decimate(rtc::ArrayView in, + rtc::ArrayView out) { + RTC_DCHECK_EQ(kBlockSize, in.size()); + RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size()); + std::array x; + + // Limit the frequency content of the signal to avoid aliasing. + anti_aliasing_filter_.Process(in, x); + + // Reduce the impact of near-end noise. + noise_reduction_filter_.Process(x); + + // Downsample the signal. + for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) { + RTC_DCHECK_GT(kBlockSize, k); + out[j] = x[k]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h new file mode 100644 index 0000000000..dbff3d9fff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +namespace webrtc { + +// Provides functionality for decimating a signal. +class Decimator { + public: + explicit Decimator(size_t down_sampling_factor); + + Decimator(const Decimator&) = delete; + Decimator& operator=(const Decimator&) = delete; + + // Downsamples the signal. + void Decimate(rtc::ArrayView in, rtc::ArrayView out); + + private: + const size_t down_sampling_factor_; + CascadedBiQuadFilter anti_aliasing_filter_; + CascadedBiQuadFilter noise_reduction_filter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc new file mode 100644 index 0000000000..e6f5ea0403 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/decimator_unittest.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/decimator.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr float kPi = 3.141592f; +constexpr size_t kNumStartupBlocks = 50; +constexpr size_t kNumBlocks = 1000; + +void ProduceDecimatedSinusoidalOutputPower(int sample_rate_hz, + size_t down_sampling_factor, + float sinusoidal_frequency_hz, + float* input_power, + float* output_power) { + float input[kBlockSize * kNumBlocks]; + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + // Produce a sinusoid of the specified frequency. + for (size_t k = 0; k < kBlockSize * kNumBlocks; ++k) { + input[k] = 32767.f * std::sin(2.f * kPi * sinusoidal_frequency_hz * k / + sample_rate_hz); + } + + Decimator decimator(down_sampling_factor); + std::vector output(sub_block_size * kNumBlocks); + + for (size_t k = 0; k < kNumBlocks; ++k) { + std::vector sub_block(sub_block_size); + decimator.Decimate( + rtc::ArrayView(&input[k * kBlockSize], kBlockSize), + sub_block); + + std::copy(sub_block.begin(), sub_block.end(), + output.begin() + k * sub_block_size); + } + + ASSERT_GT(kNumBlocks, kNumStartupBlocks); + rtc::ArrayView input_to_evaluate( + &input[kNumStartupBlocks * kBlockSize], + (kNumBlocks - kNumStartupBlocks) * kBlockSize); + rtc::ArrayView output_to_evaluate( + &output[kNumStartupBlocks * sub_block_size], + (kNumBlocks - kNumStartupBlocks) * sub_block_size); + *input_power = + std::inner_product(input_to_evaluate.begin(), input_to_evaluate.end(), + input_to_evaluate.begin(), 0.f) / + input_to_evaluate.size(); + *output_power = + std::inner_product(output_to_evaluate.begin(), output_to_evaluate.end(), + output_to_evaluate.begin(), 0.f) / + output_to_evaluate.size(); +} + +} // namespace + +// Verifies that there is little aliasing from upper frequencies in the +// downsampling. +TEST(Decimator, NoLeakageFromUpperFrequencies) { + float input_power; + float output_power; + for (auto rate : {16000, 32000, 48000}) { + for (auto down_sampling_factor : kDownSamplingFactors) { + ProduceDebugText(rate); + ProduceDecimatedSinusoidalOutputPower(rate, down_sampling_factor, + 3.f / 8.f * rate, &input_power, + &output_power); + EXPECT_GT(0.0001f * input_power, output_power); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies the check for the input size. +TEST(DecimatorDeathTest, WrongInputSize) { + Decimator decimator(4); + std::vector x(kBlockSize - 1, 0.f); + std::array x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for non-null output parameter. +TEST(DecimatorDeathTest, NullOutput) { + Decimator decimator(4); + std::vector x(kBlockSize, 0.f); + EXPECT_DEATH(decimator.Decimate(x, nullptr), ""); +} + +// Verifies the check for the output size. +TEST(DecimatorDeathTest, WrongOutputSize) { + Decimator decimator(4); + std::vector x(kBlockSize, 0.f); + std::array x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for the correct downsampling factor. +TEST(DecimatorDeathTest, CorrectDownSamplingFactor) { + EXPECT_DEATH(Decimator(3), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h b/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h new file mode 100644 index 0000000000..7838a0c255 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/delay_estimate.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ + +#include + +namespace webrtc { + +// Stores delay_estimates. +struct DelayEstimate { + enum class Quality { kCoarse, kRefined }; + + DelayEstimate(Quality quality, size_t delay) + : quality(quality), delay(delay) {} + + Quality quality; + size_t delay; + size_t blocks_since_last_change = 0; + size_t blocks_since_last_update = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc new file mode 100644 index 0000000000..40073cf615 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/dominant_nearend_detector.h" + +#include + +namespace webrtc { +DominantNearendDetector::DominantNearendDetector( + const EchoCanceller3Config::Suppressor::DominantNearendDetection& config, + size_t num_capture_channels) + : enr_threshold_(config.enr_threshold), + enr_exit_threshold_(config.enr_exit_threshold), + snr_threshold_(config.snr_threshold), + hold_duration_(config.hold_duration), + trigger_threshold_(config.trigger_threshold), + use_during_initial_phase_(config.use_during_initial_phase), + num_capture_channels_(num_capture_channels), + trigger_counters_(num_capture_channels_), + hold_counters_(num_capture_channels_) {} + +void DominantNearendDetector::Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) { + nearend_state_ = false; + + auto low_frequency_energy = [](rtc::ArrayView spectrum) { + RTC_DCHECK_LE(16, spectrum.size()); + return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); + }; + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const float ne_sum = low_frequency_energy(nearend_spectrum[ch]); + const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]); + const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]); + + // Detect strong active nearend if the nearend is sufficiently stronger than + // the echo and the nearend noise. + if ((!initial_state || use_during_initial_phase_) && + echo_sum < enr_threshold_ * ne_sum && + ne_sum > snr_threshold_ * noise_sum) { + if (++trigger_counters_[ch] >= trigger_threshold_) { + // After a period of strong active nearend activity, flag nearend mode. + hold_counters_[ch] = hold_duration_; + trigger_counters_[ch] = trigger_threshold_; + } + } else { + // Forget previously detected strong active nearend activity. + trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1); + } + + // Exit nearend-state early at strong echo. + if (echo_sum > enr_exit_threshold_ * ne_sum && + echo_sum > snr_threshold_ * noise_sum) { + hold_counters_[ch] = 0; + } + + // Remain in any nearend mode for a certain duration. + hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1); + nearend_state_ = nearend_state_ || hold_counters_[ch] > 0; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h new file mode 100644 index 0000000000..046d1488d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/dominant_nearend_detector.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/nearend_detector.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class DominantNearendDetector : public NearendDetector { + public: + DominantNearendDetector( + const EchoCanceller3Config::Suppressor::DominantNearendDetection& config, + size_t num_capture_channels); + + // Returns whether the current state is the nearend state. + bool IsNearendState() const override { return nearend_state_; } + + // Updates the state selection based on latest spectral estimates. + void Update(rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) override; + + private: + const float enr_threshold_; + const float enr_exit_threshold_; + const float snr_threshold_; + const int hold_duration_; + const int trigger_threshold_; + const bool use_during_initial_phase_; + const size_t num_capture_channels_; + + bool nearend_state_ = false; + std::vector trigger_counters_; + std::vector hold_counters_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc new file mode 100644 index 0000000000..c105911aa8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" + +#include + +namespace webrtc { + +DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size) + : size(static_cast(downsampled_buffer_size)), + buffer(downsampled_buffer_size, 0.f) { + std::fill(buffer.begin(), buffer.end(), 0.f); +} + +DownsampledRenderBuffer::~DownsampledRenderBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h new file mode 100644 index 0000000000..fbdc9b4e93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/downsampled_render_buffer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +// Holds the circular buffer of the downsampled render data. +struct DownsampledRenderBuffer { + explicit DownsampledRenderBuffer(size_t downsampled_buffer_size); + ~DownsampledRenderBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(buffer.size(), offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc new file mode 100644 index 0000000000..142a33d5e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_audibility.h" + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +namespace webrtc { + +EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init) + : use_render_stationarity_at_init_(use_render_stationarity_at_init) { + Reset(); +} + +EchoAudibility::~EchoAudibility() = default; + +void EchoAudibility::Update(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int delay_blocks, + bool external_delay_seen) { + UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(), + render_buffer.GetBlockBuffer(), + external_delay_seen); + + if (external_delay_seen || use_render_stationarity_at_init_) { + UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks); + } +} + +void EchoAudibility::Reset() { + render_stationarity_.Reset(); + non_zero_render_seen_ = false; + render_spectrum_write_prev_ = absl::nullopt; +} + +void EchoAudibility::UpdateRenderStationarityFlags( + const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int min_channel_delay_blocks) { + const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read, + min_channel_delay_blocks); + + int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1; + num_lookahead = std::max(0, num_lookahead); + + render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb, + idx_at_delay, num_lookahead); +} + +void EchoAudibility::UpdateRenderNoiseEstimator( + const SpectrumBuffer& spectrum_buffer, + const BlockBuffer& block_buffer, + bool external_delay_seen) { + if (!render_spectrum_write_prev_) { + render_spectrum_write_prev_ = spectrum_buffer.write; + render_block_write_prev_ = block_buffer.write; + return; + } + int render_spectrum_write_current = spectrum_buffer.write; + if (!non_zero_render_seen_ && !external_delay_seen) { + non_zero_render_seen_ = !IsRenderTooLow(block_buffer); + } + if (non_zero_render_seen_) { + for (int idx = render_spectrum_write_prev_.value(); + idx != render_spectrum_write_current; + idx = spectrum_buffer.DecIndex(idx)) { + render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]); + } + } + render_spectrum_write_prev_ = render_spectrum_write_current; +} + +bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) { + const int num_render_channels = + static_cast(block_buffer.buffer[0].NumChannels()); + bool too_low = false; + const int render_block_write_current = block_buffer.write; + if (render_block_write_current == render_block_write_prev_) { + too_low = true; + } else { + for (int idx = render_block_write_prev_; idx != render_block_write_current; + idx = block_buffer.IncIndex(idx)) { + float max_abs_over_channels = 0.f; + for (int ch = 0; ch < num_render_channels; ++ch) { + rtc::ArrayView block = + block_buffer.buffer[idx].View(/*band=*/0, /*channel=*/ch); + auto r = std::minmax_element(block.cbegin(), block.cend()); + float max_abs_channel = + std::max(std::fabs(*r.first), std::fabs(*r.second)); + max_abs_over_channels = + std::max(max_abs_over_channels, max_abs_channel); + } + if (max_abs_over_channels < 10.f) { + too_low = true; // Discards all blocks if one of them is too low. + break; + } + } + } + render_block_write_prev_ = render_block_write_current; + return too_low; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h new file mode 100644 index 0000000000..b9d6f87d2a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_audibility.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +namespace webrtc { + +class EchoAudibility { + public: + explicit EchoAudibility(bool use_render_stationarity_at_init); + ~EchoAudibility(); + + EchoAudibility(const EchoAudibility&) = delete; + EchoAudibility& operator=(const EchoAudibility&) = delete; + + // Feed new render data to the echo audibility estimator. + void Update(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int min_channel_delay_blocks, + bool external_delay_seen); + // Get the residual echo scaling. + void GetResidualEchoScaling(bool filter_has_had_time_to_converge, + rtc::ArrayView residual_scaling) const { + for (size_t band = 0; band < residual_scaling.size(); ++band) { + if (render_stationarity_.IsBandStationary(band) && + (filter_has_had_time_to_converge || + use_render_stationarity_at_init_)) { + residual_scaling[band] = 0.f; + } else { + residual_scaling[band] = 1.0f; + } + } + } + + // Returns true if the current render block is estimated as stationary. + bool IsBlockStationary() const { + return render_stationarity_.IsBlockStationary(); + } + + private: + // Reset the EchoAudibility class. + void Reset(); + + // Updates the render stationarity flags for the current frame. + void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer, + rtc::ArrayView average_reverb, + int delay_blocks); + + // Updates the noise estimator with the new render data since the previous + // call to this method. + void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer, + const BlockBuffer& block_buffer, + bool external_delay_seen); + + // Returns a bool being true if the render signal contains just close to zero + // values. + bool IsRenderTooLow(const BlockBuffer& block_buffer); + + absl::optional render_spectrum_write_prev_; + int render_block_write_prev_; + bool non_zero_render_seen_; + const bool use_render_stationarity_at_init_; + StationarityEstimator render_stationarity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc new file mode 100644 index 0000000000..e8e2175994 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +enum class EchoCanceller3ApiCall { kCapture, kRender }; + +bool DetectSaturation(rtc::ArrayView y) { + for (size_t k = 0; k < y.size(); ++k) { + if (y[k] >= 32700.0f || y[k] <= -32700.0f) { + return true; + } + } + return false; +} + +// Retrieves a value from a field trial if it is available. If no value is +// present, the default value is returned. If the retrieved value is beyond the +// specified limits, the default value is returned instead. +void RetrieveFieldTrialValue(absl::string_view trial_name, + float min, + float max, + float* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = static_cast(field_trial_param.Get()); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void RetrieveFieldTrialValue(absl::string_view trial_name, + int min, + int max, + int* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = field_trial_param.Get(); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void FillSubFrameView( + AudioBuffer* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_LE(0, sub_frame_index); + RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size()); + RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size()); + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } +} + +void FillSubFrameView( + bool proper_downmix_needed, + std::vector>>* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_EQ(frame->size(), sub_frame_view->size()); + const size_t frame_num_channels = (*frame)[0].size(); + const size_t sub_frame_num_channels = (*sub_frame_view)[0].size(); + if (frame_num_channels > sub_frame_num_channels) { + RTC_DCHECK_EQ(sub_frame_num_channels, 1u); + if (proper_downmix_needed) { + // When a proper downmix is needed (which is the case when proper stereo + // is present in the echo reference signal but the echo canceller does the + // processing in mono) downmix the echo reference by averaging the channel + // content (otherwise downmixing is done by selecting channel 0). + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t ch = 1; ch < frame_num_channels; ++ch) { + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0] + [sub_frame_index * kSubFrameLength + k] += + (*frame)[band][ch][sub_frame_index * kSubFrameLength + k]; + } + } + const float one_by_num_channels = 1.0f / frame_num_channels; + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength + + k] *= one_by_num_channels; + } + } + } + for (size_t band = 0; band < frame->size(); ++band) { + (*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView( + &(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } else { + RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels); + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*frame)[band][channel][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } + } +} + +void ProcessCaptureFrameContent( + AudioBuffer* linear_output, + AudioBuffer* capture, + bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + size_t sub_frame_index, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + std::vector>>* + linear_output_sub_frame_view, + Block* capture_block, + std::vector>>* capture_sub_frame_view) { + FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + RTC_DCHECK(linear_output_block); + RTC_DCHECK(linear_output_sub_frame_view); + FillSubFrameView(linear_output, sub_frame_index, + linear_output_sub_frame_view); + } + + capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view, + capture_block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, capture_block); + output_framer->InsertBlockAndExtractSubFrame(*capture_block, + capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + linear_output_framer->InsertBlockAndExtractSubFrame( + *linear_output_block, linear_output_sub_frame_view); + } +} + +void ProcessRemainingCaptureFrameContent(bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + Block* block) { + if (!capture_blocker->IsBlockAvailable()) { + return; + } + + capture_blocker->ExtractBlock(block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, block); + output_framer->InsertBlock(*block); + + if (linear_output_framer) { + RTC_DCHECK(linear_output_block); + linear_output_framer->InsertBlock(*linear_output_block); + } +} + +void BufferRenderFrameContent( + bool proper_downmix_needed, + std::vector>>* render_frame, + size_t sub_frame_index, + FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block, + std::vector>>* sub_frame_view) { + FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index, + sub_frame_view); + render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block); + block_processor->BufferRender(*block); +} + +void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block) { + if (!render_blocker->IsBlockAvailable()) { + return; + } + render_blocker->ExtractBlock(block); + block_processor->BufferRender(*block); +} + +void CopyBufferIntoFrame(const AudioBuffer& buffer, + size_t num_bands, + size_t num_channels, + std::vector>>* frame) { + RTC_DCHECK_EQ(num_bands, frame->size()); + RTC_DCHECK_EQ(num_channels, (*frame)[0].size()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size()); + for (size_t band = 0; band < num_bands; ++band) { + for (size_t channel = 0; channel < num_channels; ++channel) { + rtc::ArrayView buffer_view( + &buffer.split_bands_const(channel)[band][0], + AudioBuffer::kSplitBandSize); + std::copy(buffer_view.begin(), buffer_view.end(), + (*frame)[band][channel].begin()); + } + } +} + +} // namespace + +// TODO(webrtc:5298): Move this to a separate file. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { + EchoCanceller3Config adjusted_cfg = config; + + if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) { + adjusted_cfg.multi_channel.detect_stereo_content = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) { + adjusted_cfg.suppressor.high_bands_suppression + .anti_howling_activation_threshold = 25.f; + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) { + adjusted_cfg.filter.config_change_duration_blocks = 10; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 0.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .1f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .3f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .9f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 2.0f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) { + adjusted_cfg.filter.high_pass_filter_echo_reference = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) { + adjusted_cfg.ep_strength.echo_can_saturate = false; + } + + const std::string use_nearend_reverb_len_tunings = + field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen"); + FieldTrialParameter nearend_reverb_default_len( + "default_len", adjusted_cfg.ep_strength.default_len); + FieldTrialParameter nearend_reverb_nearend_len( + "nearend_len", adjusted_cfg.ep_strength.nearend_len); + + ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len}, + use_nearend_reverb_len_tunings); + float default_len = static_cast(nearend_reverb_default_len.Get()); + float nearend_len = static_cast(nearend_reverb_nearend_len.Get()); + if (default_len > -1 && default_len < 1 && nearend_len > -1 && + nearend_len < 1) { + adjusted_cfg.ep_strength.default_len = + static_cast(nearend_reverb_default_len.Get()); + adjusted_cfg.ep_strength.nearend_len = + static_cast(nearend_reverb_nearend_len.Get()); + } + + if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) { + // Two blocks headroom. + adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_zero = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_one = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) { + adjusted_cfg.erle.onset_detection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) { + adjusted_cfg.delay.render_alignment_mixing.downmix = true; + adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) { + adjusted_cfg.delay.capture_alignment_mixing.downmix = true; + adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + true; + } + + if (field_trial::IsEnabled( + "WebRTC-" + "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3VerySensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) { + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f; + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f; + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f; + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f; + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) { + adjusted_cfg.suppressor.conservative_hf_suppression = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) { + adjusted_cfg.echo_audibility.use_stationarity_properties = true; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) { + adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 50.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 30.f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) { + adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false; + } + + // Field-trial based override for the whole suppressor tuning. + const std::string suppressor_tuning_override_trial_name = + field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride"); + + FieldTrialParameter nearend_tuning_mask_lf_enr_transparent( + "nearend_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_lf_enr_suppress( + "nearend_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + FieldTrialParameter nearend_tuning_mask_hf_enr_transparent( + "nearend_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_hf_enr_suppress( + "nearend_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + FieldTrialParameter nearend_tuning_max_inc_factor( + "nearend_tuning_max_inc_factor", + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + FieldTrialParameter nearend_tuning_max_dec_factor_lf( + "nearend_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + FieldTrialParameter normal_tuning_mask_lf_enr_transparent( + "normal_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + FieldTrialParameter normal_tuning_mask_lf_enr_suppress( + "normal_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + FieldTrialParameter normal_tuning_mask_hf_enr_transparent( + "normal_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + FieldTrialParameter normal_tuning_mask_hf_enr_suppress( + "normal_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + FieldTrialParameter normal_tuning_max_inc_factor( + "normal_tuning_max_inc_factor", + adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + FieldTrialParameter normal_tuning_max_dec_factor_lf( + "normal_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + FieldTrialParameter dominant_nearend_detection_enr_threshold( + "dominant_nearend_detection_enr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + FieldTrialParameter dominant_nearend_detection_enr_exit_threshold( + "dominant_nearend_detection_enr_exit_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + FieldTrialParameter dominant_nearend_detection_snr_threshold( + "dominant_nearend_detection_snr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + FieldTrialParameter dominant_nearend_detection_hold_duration( + "dominant_nearend_detection_hold_duration", + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + FieldTrialParameter dominant_nearend_detection_trigger_threshold( + "dominant_nearend_detection_trigger_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + ParseFieldTrial( + {&nearend_tuning_mask_lf_enr_transparent, + &nearend_tuning_mask_lf_enr_suppress, + &nearend_tuning_mask_hf_enr_transparent, + &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor, + &nearend_tuning_max_dec_factor_lf, + &normal_tuning_mask_lf_enr_transparent, + &normal_tuning_mask_lf_enr_suppress, + &normal_tuning_mask_hf_enr_transparent, + &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor, + &normal_tuning_max_dec_factor_lf, + &dominant_nearend_detection_enr_threshold, + &dominant_nearend_detection_enr_exit_threshold, + &dominant_nearend_detection_snr_threshold, + &dominant_nearend_detection_hold_duration, + &dominant_nearend_detection_trigger_threshold}, + suppressor_tuning_override_trial_name); + + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = + static_cast(nearend_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = + static_cast(nearend_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = + static_cast(nearend_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = + static_cast(nearend_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = + static_cast(nearend_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = + static_cast(nearend_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = + static_cast(normal_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = + static_cast(normal_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = + static_cast(normal_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = + static_cast(normal_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = + static_cast(normal_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = + static_cast(normal_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = + static_cast(dominant_nearend_detection_enr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold = + static_cast(dominant_nearend_detection_enr_exit_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold = + static_cast(dominant_nearend_detection_snr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration = + dominant_nearend_detection_hold_duration.Get(); + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold = + dominant_nearend_detection_trigger_threshold.Get(); + + // Field trial-based overrides of individual suppressor parameters. + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f, + 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain); + + // Field trial-based overrides of individual delay estimator parameters. + RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing); + RetrieveFieldTrialValue( + "WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing_delay_found); + + return adjusted_cfg; +} + +class EchoCanceller3::RenderWriter { + public: + RenderWriter(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels); + + RenderWriter() = delete; + RenderWriter(const RenderWriter&) = delete; + RenderWriter& operator=(const RenderWriter&) = delete; + + ~RenderWriter(); + void Insert(const AudioBuffer& input); + + private: + ApmDataDumper* data_dumper_; + const size_t num_bands_; + const size_t num_channels_; + std::unique_ptr high_pass_filter_; + std::vector>> render_queue_input_frame_; + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue_; +}; + +EchoCanceller3::RenderWriter::RenderWriter( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels) + : data_dumper_(data_dumper), + num_bands_(num_bands), + num_channels_(num_channels), + render_queue_input_frame_( + num_bands_, + std::vector>( + num_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_transfer_queue_(render_transfer_queue) { + RTC_DCHECK(data_dumper); + if (config.filter.high_pass_filter_echo_reference) { + high_pass_filter_ = std::make_unique(16000, num_channels); + } +} + +EchoCanceller3::RenderWriter::~RenderWriter() = default; + +void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band()); + RTC_DCHECK_EQ(num_bands_, input.num_bands()); + RTC_DCHECK_EQ(num_channels_, input.num_channels()); + + // TODO(bugs.webrtc.org/8759) Temporary work-around. + if (num_bands_ != input.num_bands()) + return; + + data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize, + &input.split_bands_const(0)[0][0], 16000, 1); + + CopyBufferIntoFrame(input, num_bands_, num_channels_, + &render_queue_input_frame_); + if (high_pass_filter_) { + high_pass_filter_->Process(&render_queue_input_frame_[0]); + } + + static_cast(render_transfer_queue_->Insert(&render_queue_input_frame_)); +} + +std::atomic EchoCanceller3::instance_count_(0); + +EchoCanceller3::EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(AdjustConfig(config)), + sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + num_render_input_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + config_selector_(AdjustConfig(config), + multichannel_config, + num_render_input_channels_), + multichannel_content_detector_( + config_selector_.active_config().multi_channel.detect_stereo_content, + num_render_input_channels_, + config_selector_.active_config() + .multi_channel.stereo_detection_threshold, + config_selector_.active_config() + .multi_channel.stereo_detection_timeout_threshold_seconds, + config_selector_.active_config() + .multi_channel.stereo_detection_hysteresis_seconds), + output_framer_(num_bands_, num_capture_channels_), + capture_blocker_(num_bands_, num_capture_channels_), + render_transfer_queue_( + kRenderTransferQueueSizeFrames, + std::vector>>( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + Aec3RenderQueueItemVerifier(num_bands_, + num_render_input_channels_, + AudioBuffer::kSplitBandSize)), + render_queue_output_frame_( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_block_(num_bands_, num_render_input_channels_), + capture_block_(num_bands_, num_capture_channels_), + capture_sub_frame_view_( + num_bands_, + std::vector>(num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + block_delay_buffer_.reset(new BlockDelayBuffer( + num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize, + config_.delay.fixed_capture_delay_samples)); + } + + render_writer_.reset(new RenderWriter( + data_dumper_.get(), config_selector_.active_config(), + &render_transfer_queue_, num_bands_, num_render_input_channels_)); + + RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000); + RTC_DCHECK_GE(kMaxNumBands, num_bands_); + + if (config_selector_.active_config().filter.export_linear_aec_output) { + linear_output_framer_.reset( + new BlockFramer(/*num_bands=*/1, num_capture_channels_)); + linear_output_block_ = + std::make_unique(/*num_bands=*/1, num_capture_channels_), + linear_output_sub_frame_view_ = + std::vector>>( + 1, std::vector>(num_capture_channels_)); + } + + Initialize(); + + RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_ + << " Hz, num render channels: " << num_render_input_channels_ + << ", num capture channels: " << num_capture_channels_; +} + +EchoCanceller3::~EchoCanceller3() = default; + +void EchoCanceller3::Initialize() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + + num_render_channels_to_aec_ = + multichannel_content_detector_.IsProperMultiChannelContentDetected() + ? num_render_input_channels_ + : 1; + + config_selector_.Update( + multichannel_content_detector_.IsProperMultiChannelContentDetected()); + + render_block_.SetNumChannels(num_render_channels_to_aec_); + + render_blocker_.reset( + new FrameBlocker(num_bands_, num_render_channels_to_aec_)); + + block_processor_.reset(BlockProcessor::Create( + config_selector_.active_config(), sample_rate_hz_, + num_render_channels_to_aec_, num_capture_channels_)); + + render_sub_frame_view_ = std::vector>>( + num_bands_, + std::vector>(num_render_channels_to_aec_)); +} + +void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) { + RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_); + + RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kRender)); + + return render_writer_->Insert(render); +} + +void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(), + capture.channels_const()[0], sample_rate_hz_, 1); + saturated_microphone_signal_ = false; + for (size_t channel = 0; channel < capture.num_channels(); ++channel) { + saturated_microphone_signal_ |= + DetectSaturation(rtc::ArrayView( + capture.channels_const()[channel], capture.num_frames())); + if (saturated_microphone_signal_) { + break; + } + } +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { + ProcessCapture(capture, nullptr, level_change); +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, + AudioBuffer* linear_output, + bool level_change) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(capture); + RTC_DCHECK_EQ(num_bands_, capture->num_bands()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band()); + RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kCapture)); + + if (linear_output && !linear_output_framer_) { + RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without " + "properly configuring AEC3."; + RTC_DCHECK_NOTREACHED(); + } + + // Report capture call in the metrics and periodically update API call + // metrics. + api_call_metrics_.ReportCaptureCall(); + + // Optionally delay the capture signal. + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + RTC_DCHECK(block_delay_buffer_); + block_delay_buffer_->DelaySignal(capture); + } + + rtc::ArrayView capture_lower_band = rtc::ArrayView( + &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize); + + data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1); + + EmptyRenderQueue(); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 0, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 1, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessRemainingCaptureFrameContent( + level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &capture_block_); + + data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize, + &capture->split_bands(0)[0][0], 16000, 1); +} + +EchoControl::Metrics EchoCanceller3::GetMetrics() const { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + Metrics metrics; + block_processor_->GetMetrics(&metrics); + return metrics; +} + +void EchoCanceller3::SetAudioBufferDelay(int delay_ms) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetAudioBufferDelay(delay_ms); +} + +void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetCaptureOutputUsage(capture_output_used); +} + +bool EchoCanceller3::ActiveProcessing() const { + return true; +} + +EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() { + EchoCanceller3Config cfg; + // Use shorter and more rapidly adapting coarse filter to compensate for + // thge increased number of total filter parameters to adapt. + cfg.filter.coarse.length_blocks = 11; + cfg.filter.coarse.rate = 0.95f; + cfg.filter.coarse_initial.length_blocks = 11; + cfg.filter.coarse_initial.rate = 0.95f; + + // Use more concervative suppressor behavior for non-nearend speech. + cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f; + cfg.suppressor.normal_tuning.max_inc_factor = 1.5f; + return cfg; +} + +void EchoCanceller3::SetBlockProcessorForTesting( + std::unique_ptr block_processor) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(block_processor); + block_processor_ = std::move(block_processor); +} + +void EchoCanceller3::EmptyRenderQueue() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + bool frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + while (frame_to_buffer) { + // Report render call in the metrics. + api_call_metrics_.ReportRenderCall(); + + if (multichannel_content_detector_.UpdateDetection( + render_queue_output_frame_)) { + // Reinitialize the AEC when proper stereo is detected. + Initialize(); + } + + // Buffer frame content. + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 0, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 1, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRemainingRenderFrameContent(render_blocker_.get(), + block_processor_.get(), &render_block_); + + frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h new file mode 100644 index 0000000000..7bf8e51a4b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/api_call_jitter_metrics.h" +#include "modules/audio_processing/aec3/block_delay_buffer.h" +#include "modules/audio_processing/aec3/block_framer.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/config_selector.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +// Method for adjusting config parameter dependencies. +// Only to be used externally to AEC3 for testing purposes. +// TODO(webrtc:5298): Move this to a separate file. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config); + +// Functor for verifying the invariance of the frames being put into the render +// queue. +class Aec3RenderQueueItemVerifier { + public: + Aec3RenderQueueItemVerifier(size_t num_bands, + size_t num_channels, + size_t frame_length) + : num_bands_(num_bands), + num_channels_(num_channels), + frame_length_(frame_length) {} + + bool operator()(const std::vector>>& v) const { + if (v.size() != num_bands_) { + return false; + } + for (const auto& band : v) { + if (band.size() != num_channels_) { + return false; + } + for (const auto& channel : band) { + if (channel.size() != frame_length_) { + return false; + } + } + } + return true; + } + + private: + const size_t num_bands_; + const size_t num_channels_; + const size_t frame_length_; +}; + +// Main class for the echo canceller3. +// It does 4 things: +// -Receives 10 ms frames of band-split audio. +// -Provides the lower level echo canceller functionality with +// blocks of 64 samples of audio data. +// -Partially handles the jitter in the render and capture API +// call sequence. +// +// The class is supposed to be used in a non-concurrent manner apart from the +// AnalyzeRender call which can be called concurrently with the other methods. +class EchoCanceller3 : public EchoControl { + public: + EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + + ~EchoCanceller3() override; + + EchoCanceller3(const EchoCanceller3&) = delete; + EchoCanceller3& operator=(const EchoCanceller3&) = delete; + + // Analyzes and stores an internal copy of the split-band domain render + // signal. + void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); } + // Analyzes the full-band domain capture signal to detect signal saturation. + void AnalyzeCapture(AudioBuffer* capture) override { + AnalyzeCapture(*capture); + } + // Processes the split-band domain capture signal in order to remove any echo + // present in the signal. + void ProcessCapture(AudioBuffer* capture, bool level_change) override; + // As above, but also returns the linear filter output. + void ProcessCapture(AudioBuffer* capture, + AudioBuffer* linear_output, + bool level_change) override; + // Collect current metrics from the echo canceller. + Metrics GetMetrics() const override; + // Provides an optional external estimate of the audio buffer delay. + void SetAudioBufferDelay(int delay_ms) override; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the echo controller to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + void SetCaptureOutputUsage(bool capture_output_used) override; + + bool ActiveProcessing() const override; + + // Signals whether an external detector has detected echo leakage from the + // echo canceller. + // Note that in the case echo leakage has been flagged, it should be unflagged + // once it is no longer occurring. + void UpdateEchoLeakageStatus(bool leakage_detected) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->UpdateEchoLeakageStatus(leakage_detected); + } + + // Produces a default configuration for multichannel. + static EchoCanceller3Config CreateDefaultMultichannelConfig(); + + private: + friend class EchoCanceller3Tester; + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + DetectionOfProperStereoUsingThreshold); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + DetectionOfProperStereoUsingHysteresis); + FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, + StereoContentDetectionForMonoSignals); + + class RenderWriter; + + // (Re-)Initializes the selected subset of the EchoCanceller3 fields, at + // creation as well as during reconfiguration. + void Initialize(); + + // Only for testing. Replaces the internal block processor. + void SetBlockProcessorForTesting( + std::unique_ptr block_processor); + + // Only for testing. Returns whether stereo processing is active. + bool StereoRenderProcessingActiveForTesting() const { + return multichannel_content_detector_.IsProperMultiChannelContentDetected(); + } + + // Only for testing. + const EchoCanceller3Config& GetActiveConfigForTesting() const { + return config_selector_.active_config(); + } + + // Empties the render SwapQueue. + void EmptyRenderQueue(); + + // Analyzes and stores an internal copy of the split-band domain render + // signal. + void AnalyzeRender(const AudioBuffer& render); + // Analyzes the full-band domain capture signal to detect signal saturation. + void AnalyzeCapture(const AudioBuffer& capture); + + rtc::RaceChecker capture_race_checker_; + rtc::RaceChecker render_race_checker_; + + // State that is accessed by the AnalyzeRender call. + std::unique_ptr render_writer_ + RTC_GUARDED_BY(render_race_checker_); + + // State that may be accessed by the capture thread. + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const EchoCanceller3Config config_; + const int sample_rate_hz_; + const int num_bands_; + const size_t num_render_input_channels_; + size_t num_render_channels_to_aec_; + const size_t num_capture_channels_; + ConfigSelector config_selector_; + MultiChannelContentDetector multichannel_content_detector_; + std::unique_ptr linear_output_framer_ + RTC_GUARDED_BY(capture_race_checker_); + BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_); + FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr render_blocker_ + RTC_GUARDED_BY(capture_race_checker_); + SwapQueue>>, + Aec3RenderQueueItemVerifier> + render_transfer_queue_; + std::unique_ptr block_processor_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> render_queue_output_frame_ + RTC_GUARDED_BY(capture_race_checker_); + bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) = + false; + Block render_block_ RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr linear_output_block_ + RTC_GUARDED_BY(capture_race_checker_); + Block capture_block_ RTC_GUARDED_BY(capture_race_checker_); + std::vector>> render_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> linear_output_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector>> capture_sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr block_delay_buffer_ + RTC_GUARDED_BY(capture_race_checker_); + ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc new file mode 100644 index 0000000000..ad126af4d3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -0,0 +1,1160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/mock/mock_block_processor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::_; +using ::testing::StrictMock; + +// Populates the frame with linearly increasing sample values for each band, +// with a band-specific offset, in order to allow simple bitexactness +// verification for each band. +void PopulateInputFrame(size_t frame_length, + size_t num_bands, + size_t frame_index, + float* const* frame, + int offset) { + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast(frame_index * frame_length + i) + offset; + frame[k][i] = (value > 0 ? 5000 * k + value : 0); + } + } +} + +// Populates the frame with linearly increasing sample values. +void PopulateInputFrame(size_t frame_length, + size_t frame_index, + float* frame, + int offset) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast(frame_index * frame_length + i) + offset; + frame[i] = std::max(value, 0.f); + } +} + +// Verifies the that samples in the output frame are identical to the samples +// that were produced for the input frame, with an offset in order to compensate +// for buffering delays. +bool VerifyOutputFrameBitexactness(size_t frame_length, + size_t num_bands, + size_t frame_index, + const float* const* frame, + int offset) { + float reference_frame_data[kMaxNumBands][2 * kSubFrameLength]; + float* reference_frame[kMaxNumBands]; + for (size_t k = 0; k < num_bands; ++k) { + reference_frame[k] = &reference_frame_data[k][0]; + } + + PopulateInputFrame(frame_length, num_bands, frame_index, reference_frame, + offset); + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + if (reference_frame[k][i] != frame[k][i]) { + return false; + } + } + } + + return true; +} + +bool VerifyOutputFrameBitexactness(rtc::ArrayView reference, + rtc::ArrayView frame, + int offset) { + for (size_t k = 0; k < frame.size(); ++k) { + int reference_index = static_cast(k) + offset; + if (reference_index >= 0) { + if (reference[reference_index] != frame[k]) { + return false; + } + } + } + return true; +} + +// Class for testing that the capture data is properly received by the block +// processor and that the processor data is properly passed to the +// EchoCanceller3 output. +class CaptureTransportVerificationProcessor : public BlockProcessor { + public: + explicit CaptureTransportVerificationProcessor(size_t num_bands) {} + + CaptureTransportVerificationProcessor() = delete; + CaptureTransportVerificationProcessor( + const CaptureTransportVerificationProcessor&) = delete; + CaptureTransportVerificationProcessor& operator=( + const CaptureTransportVerificationProcessor&) = delete; + + ~CaptureTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block) override {} + + void BufferRender(const Block& block) override {} + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + void SetAudioBufferDelay(int delay_ms) override {} + + void SetCaptureOutputUsage(bool capture_output_used) {} +}; + +// Class for testing that the render data is properly received by the block +// processor. +class RenderTransportVerificationProcessor : public BlockProcessor { + public: + explicit RenderTransportVerificationProcessor(size_t num_bands) {} + + RenderTransportVerificationProcessor() = delete; + RenderTransportVerificationProcessor( + const RenderTransportVerificationProcessor&) = delete; + RenderTransportVerificationProcessor& operator=( + const RenderTransportVerificationProcessor&) = delete; + + ~RenderTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block) override { + Block render_block = received_render_blocks_.front(); + received_render_blocks_.pop_front(); + capture_block->Swap(render_block); + } + + void BufferRender(const Block& block) override { + received_render_blocks_.push_back(block); + } + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + void SetAudioBufferDelay(int delay_ms) override {} + + void SetCaptureOutputUsage(bool capture_output_used) {} + + private: + std::deque received_render_blocks_; +}; + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, int variant) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << ", variant: " << variant; + return ss.Release(); +} + +void RunAecInStereo(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value, + float channel_1_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + rtc::ArrayView data_channel_1(&buffer.channels()[1][0], + buffer.num_frames()); + std::fill(data_channel_1.begin(), data_channel_1.end(), channel_1_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + +void RunAecInSMono(AudioBuffer& buffer, + EchoCanceller3& aec3, + float channel_0_value) { + rtc::ArrayView data_channel_0(&buffer.channels()[0][0], + buffer.num_frames()); + std::fill(data_channel_0.begin(), data_channel_0.end(), channel_0_value); + aec3.AnalyzeRender(&buffer); + aec3.AnalyzeCapture(&buffer); + aec3.ProcessCapture(&buffer, /*level_change=*/false); +} + +} // namespace + +class EchoCanceller3Tester { + public: + explicit EchoCanceller3Tester(int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + frame_length_(160), + fullband_frame_length_(rtc::CheckedDivExact(sample_rate_hz_, 100)), + capture_buffer_(fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1), + render_buffer_(fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1, + fullband_frame_length_ * 100, + 1) {} + + EchoCanceller3Tester() = delete; + EchoCanceller3Tester(const EchoCanceller3Tester&) = delete; + EchoCanceller3Tester& operator=(const EchoCanceller3Tester&) = delete; + + // Verifies that the capture data is properly received by the block processor + // and that the processor data is properly passed to the EchoCanceller3 + // output. + void RunCaptureTransportVerificationTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + EXPECT_TRUE(VerifyOutputFrameBitexactness( + frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], -64)); + } + } + + // Test method for testing that the render data is properly received by the + // block processor. + void RunRenderTransportVerificationTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + std::vector> render_input(1); + std::vector capture_output; + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 100); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + for (size_t k = 0; k < frame_length_; ++k) { + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); + } + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + for (size_t k = 0; k < frame_length_; ++k) { + capture_output.push_back(capture_buffer_.split_bands(0)[0][k]); + } + } + + EXPECT_TRUE( + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); + } + + // Verifies that information about echo path changes are properly propagated + // to the block processor. + // The cases tested are: + // -That no set echo path change flags are received when there is no echo path + // change. + // -That set echo path change flags are received and continues to be received + // as long as echo path changes are flagged. + // -That set echo path change flags are no longer received when echo path + // change events stop being flagged. + enum class EchoPathChangeTestVariant { kNone, kOneSticky, kOneNonSticky }; + + void RunEchoPathChangeVerificationTest( + EchoPathChangeTestVariant echo_path_change_test_variant) { + constexpr size_t kNumFullBlocksPerFrame = 160 / kBlockSize; + constexpr size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case EchoPathChangeTestVariant::kOneSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case EchoPathChangeTestVariant::kOneNonSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + bool echo_path_change = false; + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + break; + case EchoPathChangeTestVariant::kOneSticky: + echo_path_change = true; + break; + case EchoPathChangeTestVariant::kOneNonSticky: + if (frame_index == 0) { + echo_path_change = true; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, echo_path_change); + } + } + + // Test for verifying that echo leakage information is being properly passed + // to the processor. + // The cases tested are: + // -That no method calls are received when they should not. + // -That false values are received each time they are flagged. + // -That true values are received each time they are flagged. + // -That a false value is received when flagged after a true value has been + // flagged. + enum class EchoLeakageTestVariant { + kNone, + kFalseSticky, + kTrueSticky, + kTrueNonSticky + }; + + void RunEchoLeakageVerificationTest( + EchoLeakageTestVariant leakage_report_variant) { + constexpr size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, _, _, _)) + .Times(kExpectedNumBlocksToProcess); + + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + break; + case EchoLeakageTestVariant::kFalseSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueNonSticky: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(kNumFramesToProcess - 1); + } break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + break; + case EchoLeakageTestVariant::kFalseSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(false); + } + break; + case EchoLeakageTestVariant::kTrueSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } + break; + case EchoLeakageTestVariant::kTrueNonSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } else { + aec3.UpdateEchoLeakageStatus(false); + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This verifies that saturation information is properly passed to the + // BlockProcessor. + // The cases tested are: + // -That no saturation event is passed to the processor if there is no + // saturation. + // -That one frame with one negative saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + // -That one frame with one positive saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + enum class SaturationTestVariant { kNone, kOneNegative, kOnePositive }; + + void RunCaptureSaturationVerificationTest( + SaturationTestVariant saturation_variant) { + const size_t kNumFullBlocksPerFrame = 160 / kBlockSize; + const size_t kExpectedNumBlocksToProcess = + (kNumFramesToProcess * 160) / kBlockSize; + std::unique_ptr> + block_processor_mock( + new StrictMock()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(kExpectedNumBlocksToProcess); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (saturation_variant) { + case SaturationTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess); + break; + case SaturationTestVariant::kOneNegative: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + } break; + case SaturationTestVariant::kOnePositive: { + ::testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _, _)) + .Times(kNumFullBlocksPerFrame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _, _)) + .Times(kExpectedNumBlocksToProcess - kNumFullBlocksPerFrame); + } break; + } + + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + aec3.SetBlockProcessorForTesting(std::move(block_processor_mock)); + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + for (int k = 0; k < fullband_frame_length_; ++k) { + capture_buffer_.channels()[0][k] = 0.f; + } + switch (saturation_variant) { + case SaturationTestVariant::kNone: + break; + case SaturationTestVariant::kOneNegative: + if (frame_index == 0) { + capture_buffer_.channels()[0][10] = -32768.f; + } + break; + case SaturationTestVariant::kOnePositive: + if (frame_index == 0) { + capture_buffer_.channels()[0][10] = 32767.f; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This test verifies that the swapqueue is able to handle jitter in the + // capture and render API calls. + void RunRenderSwapQueueVerificationTest() { + const EchoCanceller3Config config; + EchoCanceller3 aec3(config, /*multichannel_config=*/absl::nullopt, + sample_rate_hz_, 1, 1); + aec3.SetBlockProcessorForTesting( + std::make_unique(num_bands_)); + + std::vector> render_input(1); + std::vector capture_output; + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands(0)[0], 0); + + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + + for (size_t k = 0; k < frame_length_; ++k) { + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); + } + aec3.AnalyzeRender(&render_buffer_); + } + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + } + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands(0)[0], 0); + + aec3.ProcessCapture(&capture_buffer_, false); + for (size_t k = 0; k < frame_length_; ++k) { + capture_output.push_back(capture_buffer_.split_bands(0)[0][k]); + } + } + + EXPECT_TRUE( + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); + } + + // This test verifies that a buffer overrun in the render swapqueue is + // properly reported. + void RunRenderPipelineSwapQueueOverrunReturnValueTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, sample_rate_hz_, + 1, 1); + + constexpr size_t kRenderTransferQueueSize = 30; + for (size_t k = 0; k < 2; ++k) { + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSize; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + } + } + } + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + // Verifies the that the check for the number of bands in the AnalyzeRender + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunAnalyzeRenderNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); + PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0); + + EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), ""); + } + + // Verifies the that the check for the number of bands in the ProcessCapture + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunProcessCaptureNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), + /*multichannel_config=*/absl::nullopt, + aec3_sample_rate_hz, 1, 1); + PopulateInputFrame(frame_length_, num_bands_, 0, + &capture_buffer_.split_bands_f(0)[0], 100); + EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), ""); + } + +#endif + + private: + void OptionalBandSplit() { + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + render_buffer_.SplitIntoFrequencyBands(); + } + } + + static constexpr size_t kNumFramesToProcess = 20; + const int sample_rate_hz_; + const size_t num_bands_; + const size_t frame_length_; + const int fullband_frame_length_; + AudioBuffer capture_buffer_; + AudioBuffer render_buffer_; +}; + +TEST(EchoCanceller3Buffering, CaptureBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunCaptureTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunRenderTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderSwapQueue) { + EchoCanceller3Tester(16000).RunRenderSwapQueueVerificationTest(); +} + +TEST(EchoCanceller3Buffering, RenderSwapQueueOverrunReturnValue) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate) + .RunRenderPipelineSwapQueueOverrunReturnValueTest(); + } +} + +TEST(EchoCanceller3Messaging, CaptureSaturation) { + auto variants = {EchoCanceller3Tester::SaturationTestVariant::kNone, + EchoCanceller3Tester::SaturationTestVariant::kOneNegative, + EchoCanceller3Tester::SaturationTestVariant::kOnePositive}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunCaptureSaturationVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoPathChange) { + auto variants = { + EchoCanceller3Tester::EchoPathChangeTestVariant::kNone, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneSticky, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneNonSticky}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunEchoPathChangeVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoLeakage) { + auto variants = { + EchoCanceller3Tester::EchoLeakageTestVariant::kNone, + EchoCanceller3Tester::EchoLeakageTestVariant::kFalseSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueNonSticky}; + for (auto rate : {16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast(variant))); + EchoCanceller3Tester(rate).RunEchoLeakageVerificationTest(variant); + } + } +} + +// Tests the parameter functionality for the field trial override for the +// anti-howling gain. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorAntiHowlingGainOverride) { + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + ASSERT_EQ( + default_config.suppressor.high_bands_suppression.anti_howling_gain, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorAntiHowlingGainOverride/0.02/"); + adjusted_config = AdjustConfig(default_config); + + ASSERT_NE( + default_config.suppressor.high_bands_suppression.anti_howling_gain, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); + EXPECT_FLOAT_EQ( + 0.02f, + adjusted_config.suppressor.high_bands_suppression.anti_howling_gain); +} + +// Tests the field trial override for the enforcement of a low active render +// limit. +TEST(EchoCanceller3FieldTrials, Aec3EnforceLowActiveRenderLimit) { + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + ASSERT_EQ(default_config.render_levels.active_render_limit, + adjusted_config.render_levels.active_render_limit); + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3EnforceLowActiveRenderLimit/Enabled/"); + adjusted_config = AdjustConfig(default_config); + + ASSERT_NE(default_config.render_levels.active_render_limit, + adjusted_config.render_levels.active_render_limit); + EXPECT_FLOAT_EQ(50.f, adjusted_config.render_levels.active_render_limit); +} + +// Testing the field trial-based override of the suppressor parameters for a +// joint passing of all parameters. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorTuningOverrideAllParams) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorTuningOverride/" + "nearend_tuning_mask_lf_enr_transparent:0.1,nearend_tuning_mask_lf_enr_" + "suppress:0.2,nearend_tuning_mask_hf_enr_transparent:0.3,nearend_tuning_" + "mask_hf_enr_suppress:0.4,nearend_tuning_max_inc_factor:0.5,nearend_" + "tuning_max_dec_factor_lf:0.6,normal_tuning_mask_lf_enr_transparent:0.7," + "normal_tuning_mask_lf_enr_suppress:0.8,normal_tuning_mask_hf_enr_" + "transparent:0.9,normal_tuning_mask_hf_enr_suppress:1.0,normal_tuning_" + "max_inc_factor:1.1,normal_tuning_max_dec_factor_lf:1.2,dominant_nearend_" + "detection_enr_threshold:1.3,dominant_nearend_detection_enr_exit_" + "threshold:1.4,dominant_nearend_detection_snr_threshold:1.5,dominant_" + "nearend_detection_hold_duration:10,dominant_nearend_detection_trigger_" + "threshold:11/"); + + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_lf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_lf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_hf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_hf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + default_config.suppressor.nearend_tuning.max_inc_factor); + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + default_config.suppressor.nearend_tuning.max_dec_factor_lf); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, + default_config.suppressor.normal_tuning.mask_lf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + default_config.suppressor.normal_tuning.mask_lf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, + default_config.suppressor.normal_tuning.mask_hf.enr_transparent); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + default_config.suppressor.normal_tuning.mask_hf.enr_suppress); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.max_inc_factor, + default_config.suppressor.normal_tuning.max_inc_factor); + ASSERT_NE(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + default_config.suppressor.normal_tuning.max_dec_factor_lf); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, + default_config.suppressor.dominant_nearend_detection.enr_threshold); + ASSERT_NE( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + default_config.suppressor.dominant_nearend_detection.enr_exit_threshold); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, + default_config.suppressor.dominant_nearend_detection.snr_threshold); + ASSERT_NE(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + default_config.suppressor.dominant_nearend_detection.hold_duration); + ASSERT_NE( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + default_config.suppressor.dominant_nearend_detection.trigger_threshold); + + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, 0.1); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, 0.2); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, 0.3); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, 0.4); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + 0.5); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + 0.6); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, 0.7); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + 0.8); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, 0.9); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + 1.0); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.max_inc_factor, 1.1); + EXPECT_FLOAT_EQ(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + 1.2); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, 1.3); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + 1.4); + EXPECT_FLOAT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, 1.5); + EXPECT_EQ(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + 10); + EXPECT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + 11); +} + +// Testing the field trial-based override of the suppressor parameters for +// passing one parameter. +TEST(EchoCanceller3FieldTrials, Aec3SuppressorTuningOverrideOneParam) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3SuppressorTuningOverride/nearend_tuning_max_inc_factor:0.5/"); + + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_lf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_lf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_lf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_transparent, + default_config.suppressor.nearend_tuning.mask_hf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.mask_hf.enr_suppress, + default_config.suppressor.nearend_tuning.mask_hf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.nearend_tuning.max_dec_factor_lf, + default_config.suppressor.nearend_tuning.max_dec_factor_lf); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_transparent, + default_config.suppressor.normal_tuning.mask_lf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_lf.enr_suppress, + default_config.suppressor.normal_tuning.mask_lf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_transparent, + default_config.suppressor.normal_tuning.mask_hf.enr_transparent); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.mask_hf.enr_suppress, + default_config.suppressor.normal_tuning.mask_hf.enr_suppress); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.max_inc_factor, + default_config.suppressor.normal_tuning.max_inc_factor); + ASSERT_EQ(adjusted_config.suppressor.normal_tuning.max_dec_factor_lf, + default_config.suppressor.normal_tuning.max_dec_factor_lf); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.enr_threshold, + default_config.suppressor.dominant_nearend_detection.enr_threshold); + ASSERT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.enr_exit_threshold, + default_config.suppressor.dominant_nearend_detection.enr_exit_threshold); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.snr_threshold, + default_config.suppressor.dominant_nearend_detection.snr_threshold); + ASSERT_EQ(adjusted_config.suppressor.dominant_nearend_detection.hold_duration, + default_config.suppressor.dominant_nearend_detection.hold_duration); + ASSERT_EQ( + adjusted_config.suppressor.dominant_nearend_detection.trigger_threshold, + default_config.suppressor.dominant_nearend_detection.trigger_threshold); + + ASSERT_NE(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + default_config.suppressor.nearend_tuning.max_inc_factor); + + EXPECT_FLOAT_EQ(adjusted_config.suppressor.nearend_tuning.max_inc_factor, + 0.5); +} + +// Testing the field trial-based that override the exponential decay parameters. +TEST(EchoCanceller3FieldTrials, Aec3UseNearendReverb) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Aec3UseNearendReverbLen/default_len:0.9,nearend_len:0.8/"); + EchoCanceller3Config default_config; + EchoCanceller3Config adjusted_config = AdjustConfig(default_config); + EXPECT_FLOAT_EQ(adjusted_config.ep_strength.default_len, 0.9); + EXPECT_FLOAT_EQ(adjusted_config.ep_strength.nearend_len, 0.8); +} + +TEST(EchoCanceller3, DetectionOfProperStereo) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = 0.0f; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, DetectionOfProperStereoUsingThreshold) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + constexpr float kStereoDetectionThreshold = 2.0f; + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_threshold = + kStereoDetectionThreshold; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.0f; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold - 1.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, + 100.0f + kStereoDetectionThreshold + 10.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, DetectionOfProperStereoUsingHysteresis) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional surround_config; + + mono_config.multi_channel.detect_stereo_content = true; + mono_config.multi_channel.stereo_detection_hysteresis_seconds = 0.5f; + surround_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + surround_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + EchoCanceller3 aec3(mono_config, surround_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/kNumChannels, + /*num_capture_input_channels=*/kNumChannels); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInStereo(buffer, aec3, 100.0f, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + constexpr int kNumFramesPerSecond = 100; + for (int k = 0; + k < static_cast( + kNumFramesPerSecond * + mono_config.multi_channel.stereo_detection_hysteresis_seconds); + ++k) { + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + } + + RunAecInStereo(buffer, aec3, 100.0f, 101.0f); + EXPECT_TRUE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForSurroundConfig); +} + +TEST(EchoCanceller3, StereoContentDetectionForMonoSignals) { + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 2; + AudioBuffer buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/kNumChannels, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/kNumChannels, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/kNumChannels); + + constexpr size_t kNumBlocksForMonoConfig = 1; + constexpr size_t kNumBlocksForSurroundConfig = 2; + EchoCanceller3Config mono_config; + absl::optional multichannel_config; + + for (bool detect_stereo_content : {false, true}) { + mono_config.multi_channel.detect_stereo_content = detect_stereo_content; + multichannel_config = mono_config; + mono_config.filter.coarse_initial.length_blocks = kNumBlocksForMonoConfig; + multichannel_config->filter.coarse_initial.length_blocks = + kNumBlocksForSurroundConfig; + + AudioBuffer mono_buffer(/*input_rate=*/kSampleRateHz, + /*input_num_channels=*/1, + /*input_rate=*/kSampleRateHz, + /*buffer_num_channels=*/1, + /*output_rate=*/kSampleRateHz, + /*output_num_channels=*/1); + + EchoCanceller3 aec3(mono_config, multichannel_config, + /*sample_rate_hz=*/kSampleRateHz, + /*num_render_channels=*/1, + /*num_capture_input_channels=*/1); + + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + + RunAecInSMono(mono_buffer, aec3, 100.0f); + EXPECT_FALSE(aec3.StereoRenderProcessingActiveForTesting()); + EXPECT_EQ( + aec3.GetActiveConfigForTesting().filter.coarse_initial.length_blocks, + kNumBlocksForMonoConfig); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(EchoCanceller3InputCheckDeathTest, WrongCaptureNumBandsCheckVerification) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunProcessCaptureNumBandsCheckVerification(); + } +} + +// Verifiers that the verification for null input to the capture processing api +// call works. +TEST(EchoCanceller3InputCheckDeathTest, NullCaptureProcessingParameter) { + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 16000, 1, 1) + .ProcessCapture(nullptr, false), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoCanceller3InputCheckDeathTest, DISABLED_WrongSampleRate) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH( + EchoCanceller3(EchoCanceller3Config(), + /*multichannel_config_=*/absl::nullopt, 8001, 1, 1), + ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc new file mode 100644 index 0000000000..510e4b8a8d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +EchoPathDelayEstimator::EchoPathDelayEstimator( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(data_dumper), + down_sampling_factor_(config.delay.down_sampling_factor), + sub_block_size_(down_sampling_factor_ != 0 + ? kBlockSize / down_sampling_factor_ + : kBlockSize), + capture_mixer_(num_capture_channels, + config.delay.capture_alignment_mixing), + capture_decimator_(down_sampling_factor_), + matched_filter_( + data_dumper_, + DetectOptimization(), + sub_block_size_, + kMatchedFilterWindowSizeSubBlocks, + config.delay.num_filters, + kMatchedFilterAlignmentShiftSizeSubBlocks, + config.delay.down_sampling_factor == 8 + ? config.render_levels.poor_excitation_render_limit_ds8 + : config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo), + matched_filter_lag_aggregator_(data_dumper_, + matched_filter_.GetMaxFilterLag(), + config.delay) { + RTC_DCHECK(data_dumper); + RTC_DCHECK(down_sampling_factor_ > 0); +} + +EchoPathDelayEstimator::~EchoPathDelayEstimator() = default; + +void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) { + Reset(true, reset_delay_confidence); +} + +absl::optional EchoPathDelayEstimator::EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + const Block& capture) { + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size_); + + std::array downmixed_capture; + capture_mixer_.ProduceOutput(capture, downmixed_capture); + capture_decimator_.Decimate(downmixed_capture, downsampled_capture); + data_dumper_->DumpWav("aec3_capture_decimator_output", + downsampled_capture.size(), downsampled_capture.data(), + 16000 / down_sampling_factor_, 1); + matched_filter_.Update(render_buffer, downsampled_capture, + matched_filter_lag_aggregator_.ReliableDelayFound()); + + absl::optional aggregated_matched_filter_lag = + matched_filter_lag_aggregator_.Aggregate( + matched_filter_.GetBestLagEstimate()); + + // Run clockdrift detection. + if (aggregated_matched_filter_lag && + (*aggregated_matched_filter_lag).quality == + DelayEstimate::Quality::kRefined) + clockdrift_detector_.Update( + matched_filter_lag_aggregator_.GetDelayAtHighestPeak()); + + // TODO(peah): Move this logging outside of this class once EchoCanceller3 + // development is done. + data_dumper_->DumpRaw( + "aec3_echo_path_delay_estimator_delay", + aggregated_matched_filter_lag + ? static_cast(aggregated_matched_filter_lag->delay * + down_sampling_factor_) + : -1); + + // Return the detected delay in samples as the aggregated matched filter lag + // compensated by the down sampling factor for the signal being correlated. + if (aggregated_matched_filter_lag) { + aggregated_matched_filter_lag->delay *= down_sampling_factor_; + } + + if (old_aggregated_lag_ && aggregated_matched_filter_lag && + old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) { + ++consistent_estimate_counter_; + } else { + consistent_estimate_counter_ = 0; + } + old_aggregated_lag_ = aggregated_matched_filter_lag; + constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2; + if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) { + Reset(false, false); + } + + return aggregated_matched_filter_lag; +} + +void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator, + bool reset_delay_confidence) { + if (reset_lag_aggregator) { + matched_filter_lag_aggregator_.Reset(reset_delay_confidence); + } + matched_filter_.Reset(/*full_reset=*/reset_lag_aggregator); + old_aggregated_lag_ = absl::nullopt; + consistent_estimate_counter_ = 0; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h new file mode 100644 index 0000000000..b24d0a29ec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/alignment_mixer.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/clockdrift_detector.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/matched_filter.h" +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; +struct EchoCanceller3Config; + +// Estimates the delay of the echo path. +class EchoPathDelayEstimator { + public: + EchoPathDelayEstimator(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + size_t num_capture_channels); + ~EchoPathDelayEstimator(); + + EchoPathDelayEstimator(const EchoPathDelayEstimator&) = delete; + EchoPathDelayEstimator& operator=(const EchoPathDelayEstimator&) = delete; + + // Resets the estimation. If the delay confidence is reset, the reset behavior + // is as if the call is restarted. + void Reset(bool reset_delay_confidence); + + // Produce a delay estimate if such is avaliable. + absl::optional EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + const Block& capture); + + // Log delay estimator properties. + void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const { + matched_filter_.LogFilterProperties(sample_rate_hz, shift, + down_sampling_factor_); + } + + // Returns the level of detected clockdrift. + ClockdriftDetector::Level Clockdrift() const { + return clockdrift_detector_.ClockdriftLevel(); + } + + private: + ApmDataDumper* const data_dumper_; + const size_t down_sampling_factor_; + const size_t sub_block_size_; + AlignmentMixer capture_mixer_; + Decimator capture_decimator_; + MatchedFilter matched_filter_; + MatchedFilterLagAggregator matched_filter_lag_aggregator_; + absl::optional old_aggregated_lag_; + size_t consistent_estimate_counter_ = 0; + ClockdriftDetector clockdrift_detector_; + + // Internal reset method with more granularity. + void Reset(bool reset_lag_aggregator, bool reset_delay_confidence); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc new file mode 100644 index 0000000000..e2c101fb04 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(size_t delay, size_t down_sampling_factor) { + rtc::StringBuilder ss; + ss << "Delay: " << delay; + ss << ", Down sampling factor: " << down_sampling_factor; + return ss.Release(); +} + +} // namespace + +class EchoPathDelayEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + EchoPathDelayEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 3, 6, 8), + ::testing::Values(1, 2, 4))); + +// Verifies that the basic API calls work. +TEST_P(EchoPathDelayEstimatorMultiChannel, BasicApiCalls) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + EchoPathDelayEstimator estimator(&data_dumper, config, num_capture_channels); + Block render(kNumBands, num_render_channels); + Block capture(/*num_bands=*/1, num_capture_channels); + for (size_t k = 0; k < 100; ++k) { + render_delay_buffer->Insert(render); + estimator.EstimateDelay(render_delay_buffer->GetDownsampledRenderBuffer(), + capture); + } +} + +// Verifies that the delay estimator produces correct delay for artificially +// delayed signals. +TEST(EchoPathDelayEstimator, DelayEstimation) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + Block render(kNumBands, kNumRenderChannels); + Block capture(/*num_bands=*/1, kNumCaptureChannels); + ApmDataDumper data_dumper(0); + constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.delay_headroom_samples = 0; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = 10; + for (size_t delay_samples : {30, 64, 150, 200, 800, 4000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + DelayBuffer signal_delay_buffer(delay_samples); + EchoPathDelayEstimator estimator(&data_dumper, config, + kNumCaptureChannels); + + absl::optional estimated_delay_samples; + for (size_t k = 0; k < (500 + (delay_samples) / kBlockSize); ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay(render.View(/*band=*/0, /*channel=*/0), + capture.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render); + + if (k == 0) { + render_delay_buffer->Reset(); + } + + render_delay_buffer->PrepareCaptureProcessing(); + + auto estimate = estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture); + + if (estimate) { + estimated_delay_samples = estimate; + } + } + + if (estimated_delay_samples) { + // Allow estimated delay to be off by a block as internally the delay is + // quantized with an error up to a block. + size_t delay_ds = delay_samples / down_sampling_factor; + size_t estimated_delay_ds = + estimated_delay_samples->delay / down_sampling_factor; + EXPECT_NEAR(delay_ds, estimated_delay_ds, + kBlockSize / down_sampling_factor); + } else { + ADD_FAILURE(); + } + } + } +} + +// Verifies that the delay estimator does not produce delay estimates for render +// signals of low level. +TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + EchoCanceller3Config config; + Block render(kNumBands, kNumRenderChannels); + Block capture(/*num_bands=*/1, kNumCaptureChannels); + ApmDataDumper data_dumper(0); + EchoPathDelayEstimator estimator(&data_dumper, config, kNumCaptureChannels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + kNumRenderChannels)); + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + for (auto& render_k : render.View(/*band=*/0, /*channel=*/0)) { + render_k *= 100.f / 32767.f; + } + std::copy(render.begin(/*band=*/0, /*channel=*/0), + render.end(/*band=*/0, /*channel=*/0), + capture.begin(/*band*/ 0, /*channel=*/0)); + render_delay_buffer->Insert(render); + render_delay_buffer->PrepareCaptureProcessing(); + EXPECT_FALSE(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture)); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the render blocksize. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoPathDelayEstimatorDeathTest, DISABLED_WrongRenderBlockSize) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EchoPathDelayEstimator estimator(&data_dumper, config, 1); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, 48000, 1)); + Block capture(/*num_bands=*/1, /*num_channels=*/1); + EXPECT_DEATH(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture), + ""); +} + +// Verifies the check for non-null data dumper. +TEST(EchoPathDelayEstimatorDeathTest, NullDataDumper) { + EXPECT_DEATH(EchoPathDelayEstimator(nullptr, EchoCanceller3Config(), 1), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc new file mode 100644 index 0000000000..0ae9cff98e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.cc @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" + +namespace webrtc { + +EchoPathVariability::EchoPathVariability(bool gain_change, + DelayAdjustment delay_change, + bool clock_drift) + : gain_change(gain_change), + delay_change(delay_change), + clock_drift(clock_drift) {} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h new file mode 100644 index 0000000000..78e4f64b2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ + +namespace webrtc { + +struct EchoPathVariability { + enum class DelayAdjustment { + kNone, + kBufferFlush, + kNewDetectedDelay + }; + + EchoPathVariability(bool gain_change, + DelayAdjustment delay_change, + bool clock_drift); + + bool AudioPathChanged() const { + return gain_change || delay_change != DelayAdjustment::kNone; + } + bool gain_change; + DelayAdjustment delay_change; + bool clock_drift; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc new file mode 100644 index 0000000000..0f10f95f72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoPathVariability, CorrectBehavior) { + // Test correct passing and reporting of the gain change information. + EchoPathVariability v( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false); + EXPECT_TRUE(v.gain_change); + EXPECT_TRUE(v.delay_change == + EchoPathVariability::DelayAdjustment::kNewDetectedDelay); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability(true, EchoPathVariability::DelayAdjustment::kNone, + false); + EXPECT_TRUE(v.gain_change); + EXPECT_TRUE(v.delay_change == EchoPathVariability::DelayAdjustment::kNone); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false); + EXPECT_FALSE(v.gain_change); + EXPECT_TRUE(v.delay_change == + EchoPathVariability::DelayAdjustment::kNewDetectedDelay); + EXPECT_TRUE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); + + v = EchoPathVariability(false, EchoPathVariability::DelayAdjustment::kNone, + false); + EXPECT_FALSE(v.gain_change); + EXPECT_TRUE(v.delay_change == EchoPathVariability::DelayAdjustment::kNone); + EXPECT_FALSE(v.AudioPathChanged()); + EXPECT_FALSE(v.clock_drift); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc new file mode 100644 index 0000000000..673d88af03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.cc @@ -0,0 +1,521 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_remover.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/comfort_noise_generator.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover_metrics.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/residual_echo_estimator.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/aec3/suppression_filter.h" +#include "modules/audio_processing/aec3/suppression_gain.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { + +// Maximum number of channels for which the capture channel data is stored on +// the stack. If the number of channels are larger than this, they are stored +// using scratch memory that is pre-allocated on the heap. The reason for this +// partitioning is not to waste heap space for handling the more common numbers +// of channels, while at the same time not limiting the support for higher +// numbers of channels by enforcing the capture channel data to be stored on the +// stack using a fixed maximum value. +constexpr size_t kMaxNumChannelsOnStack = 2; + +// Chooses the number of channels to store on the heap when that is required due +// to the number of capture channels being larger than the pre-defined number +// of channels to store on the stack. +size_t NumChannelsOnHeap(size_t num_capture_channels) { + return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels + : 0; +} + +void LinearEchoPower(const FftData& E, + const FftData& Y, + std::array* S2) { + for (size_t k = 0; k < E.re.size(); ++k) { + (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + + (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); + } +} + +// Fades between two input signals using a fix-sized transition. +void SignalTransition(rtc::ArrayView from, + rtc::ArrayView to, + rtc::ArrayView out) { + if (from == to) { + RTC_DCHECK_EQ(to.size(), out.size()); + std::copy(to.begin(), to.end(), out.begin()); + } else { + constexpr size_t kTransitionSize = 30; + constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1); + + RTC_DCHECK_EQ(from.size(), to.size()); + RTC_DCHECK_EQ(from.size(), out.size()); + RTC_DCHECK_LE(kTransitionSize, out.size()); + + for (size_t k = 0; k < kTransitionSize; ++k) { + float a = (k + 1) * kOneByTransitionSizePlusOne; + out[k] = a * to[k] + (1.f - a) * from[k]; + } + + std::copy(to.begin() + kTransitionSize, to.end(), + out.begin() + kTransitionSize); + } +} + +// Computes a windowed (square root Hanning) padded FFT and updates the related +// memory. +void WindowedPaddedFft(const Aec3Fft& fft, + rtc::ArrayView v, + rtc::ArrayView v_old, + FftData* V) { + fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V); + std::copy(v.begin(), v.end(), v_old.begin()); +} + +// Class for removing the echo from the capture signal. +class EchoRemoverImpl final : public EchoRemover { + public: + EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + ~EchoRemoverImpl() override; + EchoRemoverImpl(const EchoRemoverImpl&) = delete; + EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + void ProcessCapture(EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) override; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + void UpdateEchoLeakageStatus(bool leakage_detected) override { + echo_leakage_detected_ = leakage_detected; + } + + void SetCaptureOutputUsage(bool capture_output_used) override { + capture_output_used_ = capture_output_used; + } + + private: + // Selects which of the coarse and refined linear filter outputs that is most + // appropriate to pass to the suppressor and forms the linear filter output by + // smoothly transition between those. + void FormLinearFilterOutput(const SubtractorOutput& subtractor_output, + rtc::ArrayView output); + + static std::atomic instance_count_; + const EchoCanceller3Config config_; + const Aec3Fft fft_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const int sample_rate_hz_; + const size_t num_render_channels_; + const size_t num_capture_channels_; + const bool use_coarse_filter_output_; + Subtractor subtractor_; + SuppressionGain suppression_gain_; + ComfortNoiseGenerator cng_; + SuppressionFilter suppression_filter_; + RenderSignalAnalyzer render_signal_analyzer_; + ResidualEchoEstimator residual_echo_estimator_; + bool echo_leakage_detected_ = false; + bool capture_output_used_ = true; + AecState aec_state_; + EchoRemoverMetrics metrics_; + std::vector> e_old_; + std::vector> y_old_; + size_t block_counter_ = 0; + int gain_change_hangover_ = 0; + bool refined_filter_output_last_selected_ = true; + + std::vector> e_heap_; + std::vector> Y2_heap_; + std::vector> E2_heap_; + std::vector> R2_heap_; + std::vector> R2_unbounded_heap_; + std::vector> S2_linear_heap_; + std::vector Y_heap_; + std::vector E_heap_; + std::vector comfort_noise_heap_; + std::vector high_band_comfort_noise_heap_; + std::vector subtractor_output_heap_; +}; + +std::atomic EchoRemoverImpl::instance_count_(0); + +EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) + : config_(config), + fft_(), + data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(DetectOptimization()), + sample_rate_hz_(sample_rate_hz), + num_render_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + use_coarse_filter_output_( + config_.filter.enable_coarse_filter_output_usage), + subtractor_(config, + num_render_channels_, + num_capture_channels_, + data_dumper_.get(), + optimization_), + suppression_gain_(config_, + optimization_, + sample_rate_hz, + num_capture_channels), + cng_(config_, optimization_, num_capture_channels_), + suppression_filter_(optimization_, + sample_rate_hz_, + num_capture_channels_), + render_signal_analyzer_(config_), + residual_echo_estimator_(config_, num_render_channels), + aec_state_(config_, num_capture_channels_), + e_old_(num_capture_channels_, {0.f}), + y_old_(num_capture_channels_, {0.f}), + e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}), + Y2_heap_(NumChannelsOnHeap(num_capture_channels_)), + E2_heap_(NumChannelsOnHeap(num_capture_channels_)), + R2_heap_(NumChannelsOnHeap(num_capture_channels_)), + R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)), + S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)), + Y_heap_(NumChannelsOnHeap(num_capture_channels_)), + E_heap_(NumChannelsOnHeap(num_capture_channels_)), + comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), + high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)), + subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); +} + +EchoRemoverImpl::~EchoRemoverImpl() = default; + +void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { + // Echo return loss (ERL) is inverted to go from gain to attenuation. + metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain()); + metrics->echo_return_loss_enhancement = + Log2TodB(aec_state_.FullBandErleLog2()); +} + +void EchoRemoverImpl::ProcessCapture( + EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) { + ++block_counter_; + const Block& x = render_buffer->GetBlock(0); + Block* y = capture; + RTC_DCHECK(render_buffer); + RTC_DCHECK(y); + RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_); + RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_); + + // Stack allocated data to use when the number of channels is low. + std::array, kMaxNumChannelsOnStack> e_stack; + std::array, kMaxNumChannelsOnStack> + Y2_stack; + std::array, kMaxNumChannelsOnStack> + E2_stack; + std::array, kMaxNumChannelsOnStack> + R2_stack; + std::array, kMaxNumChannelsOnStack> + R2_unbounded_stack; + std::array, kMaxNumChannelsOnStack> + S2_linear_stack; + std::array Y_stack; + std::array E_stack; + std::array comfort_noise_stack; + std::array high_band_comfort_noise_stack; + std::array subtractor_output_stack; + + rtc::ArrayView> e(e_stack.data(), + num_capture_channels_); + rtc::ArrayView> Y2( + Y2_stack.data(), num_capture_channels_); + rtc::ArrayView> E2( + E2_stack.data(), num_capture_channels_); + rtc::ArrayView> R2( + R2_stack.data(), num_capture_channels_); + rtc::ArrayView> R2_unbounded( + R2_unbounded_stack.data(), num_capture_channels_); + rtc::ArrayView> S2_linear( + S2_linear_stack.data(), num_capture_channels_); + rtc::ArrayView Y(Y_stack.data(), num_capture_channels_); + rtc::ArrayView E(E_stack.data(), num_capture_channels_); + rtc::ArrayView comfort_noise(comfort_noise_stack.data(), + num_capture_channels_); + rtc::ArrayView high_band_comfort_noise( + high_band_comfort_noise_stack.data(), num_capture_channels_); + rtc::ArrayView subtractor_output( + subtractor_output_stack.data(), num_capture_channels_); + if (NumChannelsOnHeap(num_capture_channels_) > 0) { + // If the stack-allocated space is too small, use the heap for storing the + // microphone data. + e = rtc::ArrayView>(e_heap_.data(), + num_capture_channels_); + Y2 = rtc::ArrayView>( + Y2_heap_.data(), num_capture_channels_); + E2 = rtc::ArrayView>( + E2_heap_.data(), num_capture_channels_); + R2 = rtc::ArrayView>( + R2_heap_.data(), num_capture_channels_); + R2_unbounded = rtc::ArrayView>( + R2_unbounded_heap_.data(), num_capture_channels_); + S2_linear = rtc::ArrayView>( + S2_linear_heap_.data(), num_capture_channels_); + Y = rtc::ArrayView(Y_heap_.data(), num_capture_channels_); + E = rtc::ArrayView(E_heap_.data(), num_capture_channels_); + comfort_noise = rtc::ArrayView(comfort_noise_heap_.data(), + num_capture_channels_); + high_band_comfort_noise = rtc::ArrayView( + high_band_comfort_noise_heap_.data(), num_capture_channels_); + subtractor_output = rtc::ArrayView( + subtractor_output_heap_.data(), num_capture_channels_); + } + + data_dumper_->DumpWav("aec3_echo_remover_capture_input", + y->View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpWav("aec3_echo_remover_render_input", + x.View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpRaw("aec3_echo_remover_capture_input", + y->View(/*band=*/0, /*channel=*/0)); + data_dumper_->DumpRaw("aec3_echo_remover_render_input", + x.View(/*band=*/0, /*channel=*/0)); + + aec_state_.UpdateCaptureSaturation(capture_signal_saturation); + + if (echo_path_variability.AudioPathChanged()) { + // Ensure that the gain change is only acted on once per frame. + if (echo_path_variability.gain_change) { + if (gain_change_hangover_ == 0) { + constexpr int kMaxBlocksPerFrame = 3; + gain_change_hangover_ = kMaxBlocksPerFrame; + rtc::LoggingSeverity log_level = + config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING + : rtc::LS_VERBOSE; + RTC_LOG_V(log_level) + << "Gain change detected at block " << block_counter_; + } else { + echo_path_variability.gain_change = false; + } + } + + subtractor_.HandleEchoPathChange(echo_path_variability); + aec_state_.HandleEchoPathChange(echo_path_variability); + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + suppression_gain_.SetInitialState(true); + } + } + if (gain_change_hangover_ > 0) { + --gain_change_hangover_; + } + + // Analyze the render signal. + render_signal_analyzer_.Update(*render_buffer, + aec_state_.MinDirectPathFilterDelay()); + + // State transition. + if (aec_state_.TransitionTriggered()) { + subtractor_.ExitInitialState(); + suppression_gain_.SetInitialState(false); + } + + // Perform linear echo cancellation. + subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_, + subtractor_output); + + // Compute spectra. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + FormLinearFilterOutput(subtractor_output[ch], e[ch]); + WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]); + WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]); + LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]); + Y[ch].Spectrum(optimization_, Y2[ch]); + E[ch].Spectrum(optimization_, E2[ch]); + } + + // Optionally return the linear filter output. + if (linear_output) { + RTC_DCHECK_GE(1, linear_output->NumBands()); + RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels()); + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::copy(e[ch].begin(), e[ch].end(), + linear_output->begin(/*band=*/0, ch)); + } + } + + // Update the AEC state information. + aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(), + subtractor_.FilterImpulseResponses(), *render_buffer, E2, + Y2, subtractor_output); + + // Choose the linear output. + const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; + + data_dumper_->DumpWav("aec3_output_linear", + y->View(/*band=*/0, /*channel=*/0), 16000, 1); + data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1); + + // Estimate the comfort noise. + cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise, + high_band_comfort_noise); + + // Only do the below processing if the output of the audio processing module + // is used. + std::array G; + if (capture_output_used_) { + // Estimate the residual echo power. + residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, + suppression_gain_.IsDominantNearend(), R2, + R2_unbounded); + + // Suppressor nearend estimate. + if (aec_state_.UsableLinearEstimate()) { + // E2 is bound by Y2. + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(), + E2[ch].begin(), + [](float a, float b) { return std::min(a, b); }); + } + } + const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2; + + // Suppressor echo estimate. + const auto& echo_spectrum = + aec_state_.UsableLinearEstimate() ? S2_linear : R2; + + // Determine if the suppressor should assume clock drift. + const bool clock_drift = config_.echo_removal_control.has_clock_drift || + echo_path_variability.clock_drift; + + // Compute preferred gains. + float high_bands_gain; + suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded, + cng_.NoiseSpectrum(), render_signal_analyzer_, + aec_state_, x, clock_drift, &high_bands_gain, &G); + + suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, + high_bands_gain, Y_fft, y); + + } else { + G.fill(0.f); + } + + // Update the metrics. + metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G); + + // Debug outputs for the purpose of development and analysis. + data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, + &subtractor_output[0].s_refined[0], 16000, 1); + data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0)); + data_dumper_->DumpRaw("aec3_narrow_render", + render_signal_analyzer_.NarrowPeakBand() ? 1 : 0); + data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]); + data_dumper_->DumpRaw("aec3_suppressor_gain", G); + data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0), + 16000, 1); + data_dumper_->DumpRaw("aec3_using_subtractor_output[0]", + aec_state_.UseLinearFilterOutput() ? 1 : 0); + data_dumper_->DumpRaw("aec3_E2", E2[0]); + data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]); + data_dumper_->DumpRaw("aec3_Y2", Y2[0]); + data_dumper_->DumpRaw( + "aec3_X2", render_buffer->Spectrum( + aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]); + data_dumper_->DumpRaw("aec3_R2", R2[0]); + data_dumper_->DumpRaw("aec3_filter_delay", + aec_state_.MinDirectPathFilterDelay()); + data_dumper_->DumpRaw("aec3_capture_saturation", + aec_state_.SaturatedCapture() ? 1 : 0); +} + +void EchoRemoverImpl::FormLinearFilterOutput( + const SubtractorOutput& subtractor_output, + rtc::ArrayView output) { + RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size()); + RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size()); + bool use_refined_output = true; + if (use_coarse_filter_output_) { + // As the output of the refined adaptive filter generally should be better + // than the coarse filter output, add a margin and threshold for when + // choosing the coarse filter output. + if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined && + subtractor_output.y2 > 30.f * 30.f * kBlockSize && + (subtractor_output.s2_refined > 60.f * 60.f * kBlockSize || + subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) { + use_refined_output = false; + } else { + // If the refined filter is diverged, choose the filter output that has + // the lowest power. + if (subtractor_output.e2_coarse < subtractor_output.e2_refined && + subtractor_output.y2 < subtractor_output.e2_refined) { + use_refined_output = false; + } + } + } + + SignalTransition(refined_filter_output_last_selected_ + ? subtractor_output.e_refined + : subtractor_output.e_coarse, + use_refined_output ? subtractor_output.e_refined + : subtractor_output.e_coarse, + output); + refined_filter_output_last_selected_ = use_refined_output; +} + +} // namespace + +EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) { + return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels, + num_capture_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h new file mode 100644 index 0000000000..f2f4f5e64d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/render_buffer.h" + +namespace webrtc { + +// Class for removing the echo from the capture signal. +class EchoRemover { + public: + static EchoRemover* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels); + virtual ~EchoRemover() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + virtual void ProcessCapture( + EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& external_delay, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture) = 0; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the echo remover to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + virtual void SetCaptureOutputUsage(bool capture_output_used) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc new file mode 100644 index 0000000000..c3fc80773a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include +#include + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {} +EchoRemoverMetrics::DbMetric::DbMetric(float sum_value, + float floor_value, + float ceil_value) + : sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {} + +void EchoRemoverMetrics::DbMetric::Update(float value) { + sum_value += value; + floor_value = std::min(floor_value, value); + ceil_value = std::max(ceil_value, value); +} + +void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) { + sum_value = value; + floor_value = std::min(floor_value, value); + ceil_value = std::max(ceil_value, value); +} + +EchoRemoverMetrics::EchoRemoverMetrics() { + ResetMetrics(); +} + +void EchoRemoverMetrics::ResetMetrics() { + erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f); + erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f); + saturated_capture_ = false; +} + +void EchoRemoverMetrics::Update( + const AecState& aec_state, + const std::array& comfort_noise_spectrum, + const std::array& suppressor_gain) { + metrics_reported_ = false; + if (++block_counter_ <= kMetricsCollectionBlocks) { + erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain()); + erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2()); + saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture(); + } else { + // Report the metrics over several frames in order to lower the impact of + // the logarithms involved on the computational complexity. + switch (block_counter_) { + case kMetricsCollectionBlocks + 1: + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.UsableLinearEstimate", + static_cast(aec_state.UsableLinearEstimate() ? 1 : 0)); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay", + aec_state.MinDirectPathFilterDelay(), 0, 30, + 31); + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation", + static_cast(saturated_capture_ ? 1 : 0)); + break; + case kMetricsCollectionBlocks + 2: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Value", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erl.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_time_domain_.floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 3: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Value", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.sum_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Max", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.ceil_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.Erle.Min", + aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f, + erle_time_domain_.floor_value), + 0, 19, 20); + metrics_reported_ = true; + RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_); + block_counter_ = 0; + ResetMetrics(); + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + } +} + +namespace aec3 { + +void UpdateDbMetric(const std::array& value, + std::array* statistic) { + RTC_DCHECK(statistic); + // Truncation is intended in the band width computation. + constexpr int kNumBands = 2; + constexpr int kBandWidth = 65 / kNumBands; + constexpr float kOneByBandWidth = 1.f / kBandWidth; + RTC_DCHECK_EQ(kNumBands, statistic->size()); + RTC_DCHECK_EQ(65, value.size()); + for (size_t k = 0; k < statistic->size(); ++k) { + float average_band = + std::accumulate(value.begin() + kBandWidth * k, + value.begin() + kBandWidth * (k + 1), 0.f) * + kOneByBandWidth; + (*statistic)[k].Update(average_band); + } +} + +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value) { + float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset; + if (negate) { + new_value = -new_value; + } + return static_cast(rtc::SafeClamp(new_value, min_value, max_value)); +} + +} // namespace aec3 + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h new file mode 100644 index 0000000000..aec8084d78 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" + +namespace webrtc { + +// Handles the reporting of metrics for the echo remover. +class EchoRemoverMetrics { + public: + struct DbMetric { + DbMetric(); + DbMetric(float sum_value, float floor_value, float ceil_value); + void Update(float value); + void UpdateInstant(float value); + float sum_value; + float floor_value; + float ceil_value; + }; + + EchoRemoverMetrics(); + + EchoRemoverMetrics(const EchoRemoverMetrics&) = delete; + EchoRemoverMetrics& operator=(const EchoRemoverMetrics&) = delete; + + // Updates the metric with new data. + void Update( + const AecState& aec_state, + const std::array& comfort_noise_spectrum, + const std::array& suppressor_gain); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int block_counter_ = 0; + DbMetric erl_time_domain_; + DbMetric erle_time_domain_; + bool saturated_capture_ = false; + bool metrics_reported_ = false; +}; + +namespace aec3 { + +// Updates a banded metric of type DbMetric with the values in the supplied +// array. +void UpdateDbMetric(const std::array& value, + std::array* statistic); + +// Transforms a DbMetric from the linear domain into the logarithmic domain. +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value); + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc new file mode 100644 index 0000000000..45b30a9c74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include + +#include + +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null input. +TEST(UpdateDbMetricDeathTest, NullValue) { + std::array value; + value.fill(0.f); + EXPECT_DEATH(aec3::UpdateDbMetric(value, nullptr), ""); +} + +#endif + +// Verifies the updating functionality of UpdateDbMetric. +TEST(UpdateDbMetric, Updating) { + std::array value; + std::array statistic; + statistic.fill(EchoRemoverMetrics::DbMetric(0.f, 100.f, -100.f)); + constexpr float kValue0 = 10.f; + constexpr float kValue1 = 20.f; + std::fill(value.begin(), value.begin() + 32, kValue0); + std::fill(value.begin() + 32, value.begin() + 64, kValue1); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(2.f * kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(2.f * kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); +} + +// Verifies that the TransformDbMetricForReporting method produces the desired +// output for values for dBFS. +TEST(TransformDbMetricForReporting, DbFsScaling) { + std::array x; + FftData X; + std::array X2; + Aec3Fft fft; + x.fill(1000.f); + fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X); + X.Spectrum(Aec3Optimization::kNone, X2); + + float offset = -10.f * std::log10(32768.f * 32768.f); + EXPECT_NEAR(offset, -90.3f, 0.1f); + EXPECT_EQ( + static_cast(30.3f), + aec3::TransformDbMetricForReporting( + true, 0.f, 90.f, offset, 1.f / (kBlockSize * kBlockSize), X2[0])); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// limit the output. +TEST(TransformDbMetricForReporting, Limits) { + EXPECT_EQ(0, aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, + 0.001f)); + EXPECT_EQ(10, aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, + 100.f)); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// negate output. +TEST(TransformDbMetricForReporting, Negate) { + EXPECT_EQ(10, aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, 1.f, + 0.1f)); + EXPECT_EQ(-10, aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, + 1.f, 10.f)); +} + +// Verify the Update functionality of DbMetric. +TEST(DbMetric, Update) { + EchoRemoverMetrics::DbMetric metric(0.f, 20.f, -20.f); + constexpr int kNumValues = 100; + constexpr float kValue = 10.f; + for (int k = 0; k < kNumValues; ++k) { + metric.Update(kValue); + } + EXPECT_FLOAT_EQ(kValue * kNumValues, metric.sum_value); + EXPECT_FLOAT_EQ(kValue, metric.ceil_value); + EXPECT_FLOAT_EQ(kValue, metric.floor_value); +} + +// Verify the Update functionality of DbMetric. +TEST(DbMetric, UpdateInstant) { + EchoRemoverMetrics::DbMetric metric(0.f, 20.f, -20.f); + constexpr float kMinValue = -77.f; + constexpr float kMaxValue = 33.f; + constexpr float kLastValue = (kMinValue + kMaxValue) / 2.0f; + for (float value = kMinValue; value <= kMaxValue; value++) + metric.UpdateInstant(value); + metric.UpdateInstant(kLastValue); + EXPECT_FLOAT_EQ(kLastValue, metric.sum_value); + EXPECT_FLOAT_EQ(kMaxValue, metric.ceil_value); + EXPECT_FLOAT_EQ(kMinValue, metric.floor_value); +} + +// Verify the constructor functionality of DbMetric. +TEST(DbMetric, Constructor) { + EchoRemoverMetrics::DbMetric metric; + EXPECT_FLOAT_EQ(0.f, metric.sum_value); + EXPECT_FLOAT_EQ(0.f, metric.ceil_value); + EXPECT_FLOAT_EQ(0.f, metric.floor_value); + + metric = EchoRemoverMetrics::DbMetric(1.f, 2.f, 3.f); + EXPECT_FLOAT_EQ(1.f, metric.sum_value); + EXPECT_FLOAT_EQ(2.f, metric.floor_value); + EXPECT_FLOAT_EQ(3.f, metric.ceil_value); +} + +// Verify the general functionality of EchoRemoverMetrics. +TEST(EchoRemoverMetrics, NormalUsage) { + EchoRemoverMetrics metrics; + AecState aec_state(EchoCanceller3Config{}, 1); + std::array comfort_noise_spectrum; + std::array suppressor_gain; + comfort_noise_spectrum.fill(10.f); + suppressor_gain.fill(1.f); + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc new file mode 100644 index 0000000000..66168ab08d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_remover_unittest.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, int delay) { + rtc::StringBuilder ss(ProduceDebugText(sample_rate_hz)); + ss << ", Delay: " << delay; + return ss.Release(); +} + +} // namespace + +class EchoRemoverMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + EchoRemoverMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verifies the basic API call sequence +TEST_P(EchoRemoverMultiChannel, BasicApiCalls) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + absl::optional delay_estimate; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), rate, num_render_channels, + num_capture_channels)); + std::unique_ptr render_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_render_channels)); + + Block render(NumBandsForRate(rate), num_render_channels); + Block capture(NumBandsForRate(rate), num_capture_channels); + for (size_t k = 0; k < 100; ++k) { + EchoPathVariability echo_path_variability( + k % 3 == 0 ? true : false, + k % 5 == 0 ? EchoPathVariability::DelayAdjustment::kNewDetectedDelay + : EchoPathVariability::DelayAdjustment::kNone, + false); + render_buffer->Insert(render); + render_buffer->PrepareCaptureProcessing(); + + remover->ProcessCapture(echo_path_variability, k % 2 == 0 ? true : false, + delay_estimate, render_buffer->GetRenderBuffer(), + nullptr, &capture); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the samplerate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoRemoverDeathTest, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr( + EchoRemover::Create(EchoCanceller3Config(), 8001, 1, 1)), + ""); +} + +// Verifies the check for the number of capture bands. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed.c +TEST(EchoRemoverDeathTest, DISABLED_WrongCaptureNumBands) { + absl::optional delay_estimate; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), rate, 1, 1)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), rate, 1)); + Block capture(NumBandsForRate(rate == 48000 ? 16000 : rate + 16000), 1); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + EXPECT_DEATH(remover->ProcessCapture( + echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, &capture), + ""); + } +} + +// Verifies the check for non-null capture block. +TEST(EchoRemoverDeathTest, NullCapture) { + absl::optional delay_estimate; + std::unique_ptr remover( + EchoRemover::Create(EchoCanceller3Config(), 16000, 1, 1)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 16000, 1)); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + EXPECT_DEATH(remover->ProcessCapture( + echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, nullptr), + ""); +} + +#endif + +// Performs a sanity check that the echo_remover is able to properly +// remove echoes. +TEST(EchoRemover, BasicEchoRemoval) { + constexpr int kNumBlocksToProcess = 500; + Random random_generator(42U); + absl::optional delay_estimate; + for (size_t num_channels : {1, 2, 4}) { + for (auto rate : {16000, 32000, 48000}) { + Block x(NumBandsForRate(rate), num_channels); + Block y(NumBandsForRate(rate), num_channels); + EchoPathVariability echo_path_variability( + false, EchoPathVariability::DelayAdjustment::kNone, false); + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(rate, delay_samples)); + EchoCanceller3Config config; + std::unique_ptr remover( + EchoRemover::Create(config, rate, num_channels, num_channels)); + std::unique_ptr render_buffer( + RenderDelayBuffer::Create(config, rate, num_channels)); + render_buffer->AlignFromDelay(delay_samples / kBlockSize); + + std::vector>>> + delay_buffers(x.NumBands()); + for (size_t band = 0; band < delay_buffers.size(); ++band) { + delay_buffers[band].resize(x.NumChannels()); + } + + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + delay_buffers[band][channel].reset( + new DelayBuffer(delay_samples)); + } + } + + float input_energy = 0.f; + float output_energy = 0.f; + for (int k = 0; k < kNumBlocksToProcess; ++k) { + const bool silence = k < 100 || (k % 100 >= 10); + + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + if (silence) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.f); + } else { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + delay_buffers[band][channel]->Delay(x.View(band, channel), + y.View(band, channel)); + } + } + + if (k > kNumBlocksToProcess / 2) { + input_energy = std::inner_product( + y.begin(/*band=*/0, /*channel=*/0), + y.end(/*band=*/0, /*channel=*/0), + y.begin(/*band=*/0, /*channel=*/0), input_energy); + } + + render_buffer->Insert(x); + render_buffer->PrepareCaptureProcessing(); + + remover->ProcessCapture(echo_path_variability, false, delay_estimate, + render_buffer->GetRenderBuffer(), nullptr, + &y); + + if (k > kNumBlocksToProcess / 2) { + output_energy = std::inner_product( + y.begin(/*band=*/0, /*channel=*/0), + y.end(/*band=*/0, /*channel=*/0), + y.begin(/*band=*/0, /*channel=*/0), output_energy); + } + } + EXPECT_GT(input_energy, 10.f * output_energy); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc new file mode 100644 index 0000000000..01cc33cb80 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +constexpr float kMinErl = 0.01f; +constexpr float kMaxErl = 1000.f; + +} // namespace + +ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_) + : startup_phase_length_blocks__(startup_phase_length_blocks_) { + erl_.fill(kMaxErl); + hold_counters_.fill(0); + erl_time_domain_ = kMaxErl; + hold_counter_time_domain_ = 0; +} + +ErlEstimator::~ErlEstimator() = default; + +void ErlEstimator::Reset() { + blocks_since_reset_ = 0; +} + +void ErlEstimator::Update( + const std::vector& converged_filters, + rtc::ArrayView> render_spectra, + rtc::ArrayView> + capture_spectra) { + const size_t num_capture_channels = converged_filters.size(); + RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels); + + // Corresponds to WGN of power -46 dBFS. + constexpr float kX2Min = 44015068.0f; + + const auto first_converged_iter = + std::find(converged_filters.begin(), converged_filters.end(), true); + const bool any_filter_converged = + first_converged_iter != converged_filters.end(); + + if (++blocks_since_reset_ < startup_phase_length_blocks__ || + !any_filter_converged) { + return; + } + + // Use the maximum spectrum across capture and the maximum across render. + std::array max_capture_spectrum_data; + std::array max_capture_spectrum = + capture_spectra[/*channel=*/0]; + if (num_capture_channels > 1) { + // Initialize using the first channel with a converged filter. + const size_t first_converged = + std::distance(converged_filters.begin(), first_converged_iter); + RTC_DCHECK_GE(first_converged, 0); + RTC_DCHECK_LT(first_converged, num_capture_channels); + max_capture_spectrum_data = capture_spectra[first_converged]; + + for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) { + if (!converged_filters[ch]) { + continue; + } + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + max_capture_spectrum_data[k] = + std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]); + } + } + max_capture_spectrum = max_capture_spectrum_data; + } + + const size_t num_render_channels = render_spectra.size(); + std::array max_render_spectrum_data; + rtc::ArrayView max_render_spectrum = + render_spectra[/*channel=*/0]; + if (num_render_channels > 1) { + std::copy(render_spectra[0].begin(), render_spectra[0].end(), + max_render_spectrum_data.begin()); + for (size_t ch = 1; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + max_render_spectrum_data[k] = + std::max(max_render_spectrum_data[k], render_spectra[ch][k]); + } + } + max_render_spectrum = max_render_spectrum_data; + } + + const auto& X2 = max_render_spectrum; + const auto& Y2 = max_capture_spectrum; + + // Update the estimates in a maximum statistics manner. + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[k] > kX2Min) { + const float new_erl = Y2[k] / X2[k]; + if (new_erl < erl_[k]) { + hold_counters_[k - 1] = 1000; + erl_[k] += 0.1f * (new_erl - erl_[k]); + erl_[k] = std::max(erl_[k], kMinErl); + } + } + } + + std::for_each(hold_counters_.begin(), hold_counters_.end(), + [](int& a) { --a; }); + std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1, + erl_.begin() + 1, [](int a, float b) { + return a > 0 ? b : std::min(kMaxErl, 2.f * b); + }); + + erl_[0] = erl_[1]; + erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1]; + + // Compute ERL over all frequency bins. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + + if (X2_sum > kX2Min * X2.size()) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float new_erl = Y2_sum / X2_sum; + if (new_erl < erl_time_domain_) { + hold_counter_time_domain_ = 1000; + erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_); + erl_time_domain_ = std::max(erl_time_domain_, kMinErl); + } + } + + --hold_counter_time_domain_; + erl_time_domain_ = (hold_counter_time_domain_ > 0) + ? erl_time_domain_ + : std::min(kMaxErl, 2.f * erl_time_domain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h new file mode 100644 index 0000000000..639a52c561 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Estimates the echo return loss based on the signal spectra. +class ErlEstimator { + public: + explicit ErlEstimator(size_t startup_phase_length_blocks_); + ~ErlEstimator(); + + ErlEstimator(const ErlEstimator&) = delete; + ErlEstimator& operator=(const ErlEstimator&) = delete; + + // Resets the ERL estimation. + void Reset(); + + // Updates the ERL estimate. + void Update(const std::vector& converged_filters, + rtc::ArrayView> + render_spectra, + rtc::ArrayView> + capture_spectra); + + // Returns the most recent ERL estimate. + const std::array& Erl() const { return erl_; } + float ErlTimeDomain() const { return erl_time_domain_; } + + private: + const size_t startup_phase_length_blocks__; + std::array erl_; + std::array hold_counters_; + float erl_time_domain_; + int hold_counter_time_domain_; + size_t blocks_since_reset_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc new file mode 100644 index 0000000000..79e5465e3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +std::string ProduceDebugText(size_t num_render_channels, + size_t num_capture_channels) { + rtc::StringBuilder ss; + ss << "Render channels: " << num_render_channels; + ss << ", Capture channels: " << num_capture_channels; + return ss.Release(); +} + +void VerifyErl(const std::array& erl, + float erl_time_domain, + float reference) { + std::for_each(erl.begin(), erl.end(), + [reference](float a) { EXPECT_NEAR(reference, a, 0.001); }); + EXPECT_NEAR(reference, erl_time_domain, 0.001); +} + +} // namespace + +class ErlEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ErlEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 8))); + +// Verifies that the correct ERL estimates are achieved. +TEST_P(ErlEstimatorMultiChannel, Estimates) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + SCOPED_TRACE(ProduceDebugText(num_render_channels, num_capture_channels)); + std::vector> X2(num_render_channels); + for (auto& X2_ch : X2) { + X2_ch.fill(0.f); + } + std::vector> Y2(num_capture_channels); + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + std::vector converged_filters(num_capture_channels, false); + const size_t converged_idx = num_capture_channels - 1; + converged_filters[converged_idx] = true; + + ErlEstimator estimator(0); + + // Verifies that the ERL estimate is properly reduced to lower values. + for (auto& X2_ch : X2) { + X2_ch.fill(500 * 1000.f * 1000.f); + } + Y2[converged_idx].fill(10 * X2[0][0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the ERL is not immediately increased when the ERL in the + // data increases. + Y2[converged_idx].fill(10000 * X2[0][0]); + for (size_t k = 0; k < 998; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the rate of increase is 3 dB. + estimator.Update(converged_filters, X2, Y2); + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 20.f); + + // Verifies that the maximum ERL is achieved when there are no low RLE + // estimates. + for (size_t k = 0; k < 1000; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); + + // Verifies that the ERL estimate is is not updated for low-level signals + for (auto& X2_ch : X2) { + X2_ch.fill(1000.f * 1000.f); + } + Y2[converged_idx].fill(10 * X2[0][0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(converged_filters, X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc new file mode 100644 index 0000000000..0e3d715c59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks, + const EchoCanceller3Config& config, + size_t num_capture_channels) + : startup_phase_length_blocks_(startup_phase_length_blocks), + fullband_erle_estimator_(config.erle, num_capture_channels), + subband_erle_estimator_(config, num_capture_channels) { + if (config.erle.num_sections > 1) { + signal_dependent_erle_estimator_ = + std::make_unique(config, + num_capture_channels); + } + Reset(true); +} + +ErleEstimator::~ErleEstimator() = default; + +void ErleEstimator::Reset(bool delay_change) { + fullband_erle_estimator_.Reset(); + subband_erle_estimator_.Reset(); + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Reset(); + } + if (delay_change) { + blocks_since_reset_ = 0; + } +} + +void ErleEstimator::Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView + avg_render_spectrum_with_reverb, + rtc::ArrayView> capture_spectra, + rtc::ArrayView> + subtractor_spectra, + const std::vector& converged_filters) { + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), + capture_spectra.size()); + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), + subtractor_spectra.size()); + const auto& X2_reverb = avg_render_spectrum_with_reverb; + const auto& Y2 = capture_spectra; + const auto& E2 = subtractor_spectra; + + if (++blocks_since_reset_ < startup_phase_length_blocks_) { + return; + } + + subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters); + + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Update( + render_buffer, filter_frequency_responses, X2_reverb, Y2, E2, + subband_erle_estimator_.Erle(/*onset_compensated=*/false), + subband_erle_estimator_.Erle(/*onset_compensated=*/true), + converged_filters); + } + + fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters); +} + +void ErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + fullband_erle_estimator_.Dump(data_dumper); + subband_erle_estimator_.Dump(data_dumper); + if (signal_dependent_erle_estimator_) { + signal_dependent_erle_estimator_->Dump(data_dumper); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h new file mode 100644 index 0000000000..55797592a9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" +#include "modules/audio_processing/aec3/subband_erle_estimator.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement. One estimate is done per subband +// and another one is done using the aggreation of energy over all the subbands. +class ErleEstimator { + public: + ErleEstimator(size_t startup_phase_length_blocks, + const EchoCanceller3Config& config, + size_t num_capture_channels); + ~ErleEstimator(); + + // Resets the fullband ERLE estimator and the subbands ERLE estimators. + void Reset(bool delay_change); + + // Updates the ERLE estimates. + void Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView + avg_render_spectrum_with_reverb, + rtc::ArrayView> + capture_spectra, + rtc::ArrayView> + subtractor_spectra, + const std::vector& converged_filters); + + // Returns the most recent subband ERLE estimates. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return signal_dependent_erle_estimator_ + ? signal_dependent_erle_estimator_->Erle(onset_compensated) + : subband_erle_estimator_.Erle(onset_compensated); + } + + // Returns the non-capped subband ERLE. + rtc::ArrayView> ErleUnbounded() + const { + // Unbounded ERLE is only used with the subband erle estimator where the + // ERLE is often capped at low values. When the signal dependent ERLE + // estimator is used the capped ERLE is returned. + return !signal_dependent_erle_estimator_ + ? subband_erle_estimator_.ErleUnbounded() + : signal_dependent_erle_estimator_->Erle( + /*onset_compensated=*/false); + } + + // Returns the subband ERLE that are estimated during onsets (only used for + // testing). + rtc::ArrayView> ErleDuringOnsets() + const { + return subband_erle_estimator_.ErleDuringOnsets(); + } + + // Returns the fullband ERLE estimate. + float FullbandErleLog2() const { + return fullband_erle_estimator_.FullbandErleLog2(); + } + + // Returns an estimation of the current linear filter quality based on the + // current and past fullband ERLE estimates. The returned value is a float + // vector with content between 0 and 1 where 1 indicates that, at this current + // time instant, the linear filter is reaching its maximum subtraction + // performance. + rtc::ArrayView> GetInstLinearQualityEstimates() + const { + return fullband_erle_estimator_.GetInstLinearQualityEstimates(); + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + const size_t startup_phase_length_blocks_; + FullBandErleEstimator fullband_erle_estimator_; + SubbandErleEstimator subband_erle_estimator_; + std::unique_ptr + signal_dependent_erle_estimator_; + size_t blocks_since_reset_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc new file mode 100644 index 0000000000..42be7d9c7d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; +constexpr float kTrueErle = 10.f; +constexpr float kTrueErleOnsets = 1.0f; +constexpr float kEchoPathGain = 3.f; + +void VerifyErleBands( + rtc::ArrayView> erle, + float reference_lf, + float reference_hf) { + for (size_t ch = 0; ch < erle.size(); ++ch) { + std::for_each( + erle[ch].begin(), erle[ch].begin() + kLowFrequencyLimit, + [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); }); + std::for_each( + erle[ch].begin() + kLowFrequencyLimit, erle[ch].end(), + [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); }); + } +} + +void VerifyErle( + rtc::ArrayView> erle, + float erle_time_domain, + float reference_lf, + float reference_hf) { + VerifyErleBands(erle, reference_lf, reference_hf); + EXPECT_NEAR(kTrueErle, erle_time_domain, 0.5); +} + +void VerifyErleGreaterOrEqual( + rtc::ArrayView> erle1, + rtc::ArrayView> erle2) { + for (size_t ch = 0; ch < erle1.size(); ++ch) { + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + EXPECT_GE(erle1[ch][i], erle2[ch][i]); + } + } +} + +void FormFarendTimeFrame(Block* x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RTC_DCHECK_GE(kBlockSize, frame.size()); + std::copy(frame.begin(), frame.end(), x->begin(band, channel)); + } + } +} + +void FormFarendFrame(const RenderBuffer& render_buffer, + float erle, + std::array* X2, + rtc::ArrayView> E2, + rtc::ArrayView> Y2) { + const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + const int num_render_channels = spectrum_buffer.buffer[0].size(); + const int num_capture_channels = Y2.size(); + + X2->fill(0.f); + for (int ch = 0; ch < num_render_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + (*X2)[k] += spectrum_buffer.buffer[spectrum_buffer.write][ch][k] / + num_render_channels; + } + } + + for (int ch = 0; ch < num_capture_channels; ++ch) { + std::transform(X2->begin(), X2->end(), Y2[ch].begin(), + [](float a) { return a * kEchoPathGain * kEchoPathGain; }); + std::transform(Y2[ch].begin(), Y2[ch].end(), E2[ch].begin(), + [erle](float a) { return a / erle; }); + } +} + +void FormNearendFrame( + Block* x, + std::array* X2, + rtc::ArrayView> E2, + rtc::ArrayView> Y2) { + for (int band = 0; band < x->NumBands(); ++band) { + for (int ch = 0; ch < x->NumChannels(); ++ch) { + std::fill(x->begin(band, ch), x->end(band, ch), 0.f); + } + } + + X2->fill(0.f); + for (size_t ch = 0; ch < Y2.size(); ++ch) { + Y2[ch].fill(500.f * 1000.f * 1000.f); + E2[ch].fill(Y2[ch][0]); + } +} + +void GetFilterFreq( + size_t delay_headroom_samples, + rtc::ArrayView>> + filter_frequency_response) { + const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize; + for (size_t ch = 0; ch < filter_frequency_response[0].size(); ++ch) { + for (auto& block_freq_resp : filter_frequency_response) { + block_freq_resp[ch].fill(0.f); + } + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + filter_frequency_response[delay_headroom_blocks][ch][k] = kEchoPathGain; + } + } +} + +} // namespace + +class ErleEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ErleEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4, 8), + ::testing::Values(1, 2, 8))); + +TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::array X2; + std::vector> E2(num_capture_channels); + std::vector> Y2(num_capture_channels); + std::vector converged_filters(num_capture_channels, true); + + EchoCanceller3Config config; + config.erle.onset_detection = true; + + Block x(kNumBands, num_render_channels); + std::vector>> + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector>( + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response); + + ErleEstimator estimator(0, config, num_capture_channels); + + FormFarendTimeFrame(&x); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + // Verifies that the ERLE estimate is properly increased to higher values. + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2, + Y2); + for (size_t k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); + + FormNearendFrame(&x, &X2, E2, Y2); + // Verifies that the ERLE is not immediately decreased during nearend + // activity. + for (size_t k = 0; k < 50; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); + VerifyErleGreaterOrEqual(estimator.Erle(/*onset_compensated=*/false), + estimator.Erle(/*onset_compensated=*/true)); + VerifyErleGreaterOrEqual(estimator.ErleUnbounded(), + estimator.Erle(/*onset_compensated=*/false)); +} + +TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + std::array X2; + std::vector> E2(num_capture_channels); + std::vector> Y2(num_capture_channels); + std::vector converged_filters(num_capture_channels, true); + EchoCanceller3Config config; + config.erle.onset_detection = true; + Block x(kNumBands, num_render_channels); + std::vector>> + filter_frequency_response( + config.filter.refined.length_blocks, + std::vector>( + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + + GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response); + + ErleEstimator estimator(/*startup_phase_length_blocks=*/0, config, + num_capture_channels); + + FormFarendTimeFrame(&x); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + + for (size_t burst = 0; burst < 20; ++burst) { + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErleOnsets, + &X2, E2, Y2); + for (size_t k = 0; k < 10; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2, + Y2); + for (size_t k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + FormNearendFrame(&x, &X2, E2, Y2); + for (size_t k = 0; k < 300; ++k) { + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, + converged_filters); + } + } + VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min, + config.erle.min); + FormNearendFrame(&x, &X2, E2, Y2); + for (size_t k = 0; k < 1000; k++) { + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, converged_filters); + } + // Verifies that during ne activity, Erle converges to the Erle for + // onsets. + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min, + config.erle.min); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc new file mode 100644 index 0000000000..1ce2d31d8f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_buffer.h" + +namespace webrtc { + +FftBuffer::FftBuffer(size_t size, size_t num_channels) + : size(static_cast(size)), + buffer(size, std::vector(num_channels)) { + for (auto& block : buffer) { + for (auto& channel_fft_data : block) { + channel_fft_data.Clear(); + } + } +} + +FftBuffer::~FftBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h new file mode 100644 index 0000000000..4187315863 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_buffer.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ + +#include + +#include + +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of FftData objects together with the +// read and write indices. +struct FftBuffer { + FftBuffer(size_t size, size_t num_channels); + ~FftBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(buffer.size(), offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector> buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h new file mode 100644 index 0000000000..9c25e784aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Struct that holds imaginary data produced from 128 point real-valued FFTs. +struct FftData { + // Copies the data in src. + void Assign(const FftData& src) { + std::copy(src.re.begin(), src.re.end(), re.begin()); + std::copy(src.im.begin(), src.im.end(), im.begin()); + im[0] = im[kFftLengthBy2] = 0; + } + + // Clears all the imaginary. + void Clear() { + re.fill(0.f); + im.fill(0.f); + } + + // Computes the power spectrum of the data. + void SpectrumAVX2(rtc::ArrayView power_spectrum) const; + + // Computes the power spectrum of the data. + void Spectrum(Aec3Optimization optimization, + rtc::ArrayView power_spectrum) const { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); + switch (optimization) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + constexpr int kLimit = kNumFourBinBands * 4; + for (size_t k = 0; k < kLimit; k += 4) { + const __m128 r = _mm_loadu_ps(&re[k]); + const __m128 i = _mm_loadu_ps(&im[k]); + const __m128 ii = _mm_mul_ps(i, i); + const __m128 rr = _mm_mul_ps(r, r); + const __m128 rrii = _mm_add_ps(rr, ii); + _mm_storeu_ps(&power_spectrum[k], rrii); + } + power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; + } break; + case Aec3Optimization::kAvx2: + SpectrumAVX2(power_spectrum); + break; +#endif + default: + std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(), + [](float a, float b) { return a * a + b * b; }); + } + } + + // Copy the data from an interleaved array. + void CopyFromPackedArray(const std::array& v) { + re[0] = v[0]; + re[kFftLengthBy2] = v[1]; + im[0] = im[kFftLengthBy2] = 0; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + re[k] = v[j++]; + im[k] = v[j++]; + } + } + + // Copies the data into an interleaved array. + void CopyToPackedArray(std::array* v) const { + RTC_DCHECK(v); + (*v)[0] = re[0]; + (*v)[1] = re[kFftLengthBy2]; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + (*v)[j++] = re[k]; + (*v)[j++] = im[k]; + } + } + + std::array re; + std::array im; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc new file mode 100644 index 0000000000..1fe4bd69c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_avx2.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_data.h" + +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Computes the power spectrum of the data. +void FftData::SpectrumAVX2(rtc::ArrayView power_spectrum) const { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); + for (size_t k = 0; k < kFftLengthBy2; k += 8) { + __m256 r = _mm256_loadu_ps(&re[k]); + __m256 i = _mm256_loadu_ps(&im[k]); + __m256 ii = _mm256_mul_ps(i, i); + ii = _mm256_fmadd_ps(r, r, ii); + _mm256_storeu_ps(&power_spectrum[k], ii); + } + power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build new file mode 100644 index 0000000000..d77163999b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("fft_data_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc new file mode 100644 index 0000000000..d76fabdbd6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fft_data_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_data.h" + +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(FftData, TestSse2Optimizations) { + if (GetCPUInfo(kSSE2) != 0) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + std::array spectrum_sse2; + x.Spectrum(Aec3Optimization::kNone, spectrum); + x.Spectrum(Aec3Optimization::kSse2, spectrum_sse2); + EXPECT_EQ(spectrum, spectrum_sse2); + } +} + +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(FftData, TestAvx2Optimizations) { + if (GetCPUInfo(kAVX2) != 0) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + std::array spectrum_avx2; + x.Spectrum(Aec3Optimization::kNone, spectrum); + x.Spectrum(Aec3Optimization::kAvx2, spectrum_avx2); + EXPECT_EQ(spectrum, spectrum_avx2); + } +} +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null output in CopyToPackedArray. +TEST(FftDataDeathTest, NonNullCopyToPackedArrayOutput) { + EXPECT_DEATH(FftData().CopyToPackedArray(nullptr), ""); +} + +// Verifies the check for null output in Spectrum. +TEST(FftDataDeathTest, NonNullSpectrumOutput) { + EXPECT_DEATH(FftData().Spectrum(Aec3Optimization::kNone, nullptr), ""); +} + +#endif + +// Verifies that the Assign method properly copies the data from the source and +// ensures that the imaginary components for the DC and Nyquist bins are 0. +TEST(FftData, Assign) { + FftData x; + FftData y; + + x.re.fill(1.f); + x.im.fill(2.f); + y.Assign(x); + EXPECT_EQ(x.re, y.re); + EXPECT_EQ(0.f, y.im[0]); + EXPECT_EQ(0.f, y.im[x.im.size() - 1]); + for (size_t k = 1; k < x.im.size() - 1; ++k) { + EXPECT_EQ(x.im[k], y.im[k]); + } +} + +// Verifies that the Clear method properly clears all the data. +TEST(FftData, Clear) { + FftData x_ref; + FftData x; + + x_ref.re.fill(0.f); + x_ref.im.fill(0.f); + + x.re.fill(1.f); + x.im.fill(2.f); + x.Clear(); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +// Verifies that the spectrum is correctly computed. +TEST(FftData, Spectrum) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array spectrum; + x.Spectrum(Aec3Optimization::kNone, spectrum); + + EXPECT_EQ(x.re[0] * x.re[0], spectrum[0]); + EXPECT_EQ(x.re[spectrum.size() - 1] * x.re[spectrum.size() - 1], + spectrum[spectrum.size() - 1]); + for (size_t k = 1; k < spectrum.size() - 1; ++k) { + EXPECT_EQ(x.re[k] * x.re[k] + x.im[k] * x.im[k], spectrum[k]); + } +} + +// Verifies that the functionality in CopyToPackedArray works as intended. +TEST(FftData, CopyToPackedArray) { + FftData x; + std::array x_packed; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + x.CopyToPackedArray(&x_packed); + + EXPECT_EQ(x.re[0], x_packed[0]); + EXPECT_EQ(x.re[x.re.size() - 1], x_packed[1]); + for (size_t k = 1; k < x_packed.size() / 2; ++k) { + EXPECT_EQ(x.re[k], x_packed[2 * k]); + EXPECT_EQ(x.im[k], x_packed[2 * k + 1]); + } +} + +// Verifies that the functionality in CopyFromPackedArray works as intended +// (relies on that the functionality in CopyToPackedArray has been verified in +// the test above). +TEST(FftData, CopyFromPackedArray) { + FftData x_ref; + FftData x; + std::array x_packed; + + for (size_t k = 0; k < x_ref.re.size(); ++k) { + x_ref.re[k] = k + 1; + } + + x_ref.im[0] = x_ref.im[x_ref.im.size() - 1] = 0.f; + for (size_t k = 1; k < x_ref.im.size() - 1; ++k) { + x_ref.im[k] = 2.f * (k + 1); + } + + x_ref.CopyToPackedArray(&x_packed); + x.CopyFromPackedArray(x_packed); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc new file mode 100644 index 0000000000..d8fd3aa275 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.cc @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/filter_analyzer.h" + +#include + +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +size_t FindPeakIndex(rtc::ArrayView filter_time_domain, + size_t peak_index_in, + size_t start_sample, + size_t end_sample) { + size_t peak_index_out = peak_index_in; + float max_h2 = + filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out]; + for (size_t k = start_sample; k <= end_sample; ++k) { + float tmp = filter_time_domain[k] * filter_time_domain[k]; + if (tmp > max_h2) { + peak_index_out = k; + max_h2 = tmp; + } + } + + return peak_index_out; +} + +} // namespace + +std::atomic FilterAnalyzer::instance_count_(0); + +FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + bounded_erl_(config.ep_strength.bounded_erl), + default_gain_(config.ep_strength.default_gain), + h_highpass_(num_capture_channels, + std::vector( + GetTimeDomainLength(config.filter.refined.length_blocks), + 0.f)), + filter_analysis_states_(num_capture_channels, + FilterAnalysisState(config)), + filter_delays_blocks_(num_capture_channels, 0) { + Reset(); +} + +FilterAnalyzer::~FilterAnalyzer() = default; + +void FilterAnalyzer::Reset() { + blocks_since_reset_ = 0; + ResetRegion(); + for (auto& state : filter_analysis_states_) { + state.Reset(default_gain_); + } + std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0); +} + +void FilterAnalyzer::Update( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer, + bool* any_filter_consistent, + float* max_echo_path_gain) { + RTC_DCHECK(any_filter_consistent); + RTC_DCHECK(max_echo_path_gain); + RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size()); + RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size()); + + ++blocks_since_reset_; + SetRegionToAnalyze(filters_time_domain[0].size()); + AnalyzeRegion(filters_time_domain, render_buffer); + + // Aggregate the results for all capture channels. + auto& st_ch0 = filter_analysis_states_[0]; + *any_filter_consistent = st_ch0.consistent_estimate; + *max_echo_path_gain = st_ch0.gain; + min_filter_delay_blocks_ = filter_delays_blocks_[0]; + for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) { + auto& st_ch = filter_analysis_states_[ch]; + *any_filter_consistent = + *any_filter_consistent || st_ch.consistent_estimate; + *max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain); + min_filter_delay_blocks_ = + std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]); + } +} + +void FilterAnalyzer::AnalyzeRegion( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer) { + // Preprocess the filter to avoid issues with low-frequency components in the + // filter. + PreProcessFilters(filters_time_domain); + data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]); + + constexpr float kOneByBlockSize = 1.f / kBlockSize; + for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) { + RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size()); + RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size()); + + auto& st_ch = filter_analysis_states_[ch]; + RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size()); + RTC_DCHECK_GT(h_highpass_[ch].size(), 0); + st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1); + + st_ch.peak_index = + FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_, + region_.end_sample_); + filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2; + UpdateFilterGain(h_highpass_[ch], &st_ch); + st_ch.filter_length_blocks = + filters_time_domain[ch].size() * kOneByBlockSize; + + st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect( + h_highpass_[ch], region_, + render_buffer.GetBlock(-filter_delays_blocks_[ch]), st_ch.peak_index, + filter_delays_blocks_[ch]); + } +} + +void FilterAnalyzer::UpdateFilterGain( + rtc::ArrayView filter_time_domain, + FilterAnalysisState* st) { + bool sufficient_time_to_converge = + blocks_since_reset_ > 5 * kNumBlocksPerSecond; + + if (sufficient_time_to_converge && st->consistent_estimate) { + st->gain = fabsf(filter_time_domain[st->peak_index]); + } else { + // TODO(peah): Verify whether this check against a float is ok. + if (st->gain) { + st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index])); + } + } + + if (bounded_erl_ && st->gain) { + st->gain = std::max(st->gain, 0.01f); + } +} + +void FilterAnalyzer::PreProcessFilters( + rtc::ArrayView> filters_time_domain) { + for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) { + RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size()); + RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size()); + + RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size()); + h_highpass_[ch].resize(filters_time_domain[ch].size()); + // Minimum phase high-pass filter with cutoff frequency at about 600 Hz. + constexpr std::array h = { + {0.7929742f, -0.36072128f, -0.47047766f}}; + + std::fill(h_highpass_[ch].begin() + region_.start_sample_, + h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f); + float* h_highpass_ch = h_highpass_[ch].data(); + const float* filters_time_domain_ch = filters_time_domain[ch].data(); + const size_t region_end = region_.end_sample_; + for (size_t k = std::max(h.size() - 1, region_.start_sample_); + k <= region_end; ++k) { + float tmp = h_highpass_ch[k]; + for (size_t j = 0; j < h.size(); ++j) { + tmp += filters_time_domain_ch[k - j] * h[j]; + } + h_highpass_ch[k] = tmp; + } + } +} + +void FilterAnalyzer::ResetRegion() { + region_.start_sample_ = 0; + region_.end_sample_ = 0; +} + +void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) { + constexpr size_t kNumberBlocksToUpdate = 1; + auto& r = region_; + r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1; + r.end_sample_ = + std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1, + filter_size - 1); + + // Check range. + RTC_DCHECK_LT(r.start_sample_, filter_size); + RTC_DCHECK_LT(r.end_sample_, filter_size); + RTC_DCHECK_LE(r.start_sample_, r.end_sample_); +} + +FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector( + const EchoCanceller3Config& config) + : active_render_threshold_(config.render_levels.active_render_limit * + config.render_levels.active_render_limit * + kFftLengthBy2) { + Reset(); +} + +void FilterAnalyzer::ConsistentFilterDetector::Reset() { + significant_peak_ = false; + filter_floor_accum_ = 0.f; + filter_secondary_peak_ = 0.f; + filter_floor_low_limit_ = 0; + filter_floor_high_limit_ = 0; + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = -10; +} + +bool FilterAnalyzer::ConsistentFilterDetector::Detect( + rtc::ArrayView filter_to_analyze, + const FilterRegion& region, + const Block& x_block, + size_t peak_index, + int delay_blocks) { + if (region.start_sample_ == 0) { + filter_floor_accum_ = 0.f; + filter_secondary_peak_ = 0.f; + filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64; + filter_floor_high_limit_ = + peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128; + } + + float filter_floor_accum = filter_floor_accum_; + float filter_secondary_peak = filter_secondary_peak_; + for (size_t k = region.start_sample_; + k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) { + float abs_h = fabsf(filter_to_analyze[k]); + filter_floor_accum += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + + for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_); + k <= region.end_sample_; ++k) { + float abs_h = fabsf(filter_to_analyze[k]); + filter_floor_accum += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + filter_floor_accum_ = filter_floor_accum; + filter_secondary_peak_ = filter_secondary_peak; + + if (region.end_sample_ == filter_to_analyze.size() - 1) { + float filter_floor = filter_floor_accum_ / + (filter_floor_low_limit_ + filter_to_analyze.size() - + filter_floor_high_limit_); + + float abs_peak = fabsf(filter_to_analyze[peak_index]); + significant_peak_ = abs_peak > 10.f * filter_floor && + abs_peak > 2.f * filter_secondary_peak_; + } + + if (significant_peak_) { + bool active_render_block = false; + for (int ch = 0; ch < x_block.NumChannels(); ++ch) { + rtc::ArrayView x_channel = + x_block.View(/*band=*/0, ch); + const float x_energy = std::inner_product( + x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f); + if (x_energy > active_render_threshold_) { + active_render_block = true; + break; + } + } + + if (consistent_delay_reference_ == delay_blocks) { + if (active_render_block) { + ++consistent_estimate_counter_; + } + } else { + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = delay_blocks; + } + } + return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h new file mode 100644 index 0000000000..9aec8b14d7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ + +#include + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +class ApmDataDumper; +class RenderBuffer; + +// Class for analyzing the properties of an adaptive filter. +class FilterAnalyzer { + public: + FilterAnalyzer(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~FilterAnalyzer(); + + FilterAnalyzer(const FilterAnalyzer&) = delete; + FilterAnalyzer& operator=(const FilterAnalyzer&) = delete; + + // Resets the analysis. + void Reset(); + + // Updates the estimates with new input data. + void Update(rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer, + bool* any_filter_consistent, + float* max_echo_path_gain); + + // Returns the delay in blocks for each filter. + rtc::ArrayView FilterDelaysBlocks() const { + return filter_delays_blocks_; + } + + // Returns the minimum delay of all filters in terms of blocks. + int MinFilterDelayBlocks() const { return min_filter_delay_blocks_; } + + // Returns the number of blocks for the current used filter. + int FilterLengthBlocks() const { + return filter_analysis_states_[0].filter_length_blocks; + } + + // Returns the preprocessed filter. + rtc::ArrayView> GetAdjustedFilters() const { + return h_highpass_; + } + + // Public for testing purposes only. + void SetRegionToAnalyze(size_t filter_size); + + private: + struct FilterAnalysisState; + + void AnalyzeRegion( + rtc::ArrayView> filters_time_domain, + const RenderBuffer& render_buffer); + + void UpdateFilterGain(rtc::ArrayView filters_time_domain, + FilterAnalysisState* st); + void PreProcessFilters( + rtc::ArrayView> filters_time_domain); + + void ResetRegion(); + + struct FilterRegion { + size_t start_sample_; + size_t end_sample_; + }; + + // This class checks whether the shape of the impulse response has been + // consistent over time. + class ConsistentFilterDetector { + public: + explicit ConsistentFilterDetector(const EchoCanceller3Config& config); + void Reset(); + bool Detect(rtc::ArrayView filter_to_analyze, + const FilterRegion& region, + const Block& x_block, + size_t peak_index, + int delay_blocks); + + private: + bool significant_peak_; + float filter_floor_accum_; + float filter_secondary_peak_; + size_t filter_floor_low_limit_; + size_t filter_floor_high_limit_; + const float active_render_threshold_; + size_t consistent_estimate_counter_ = 0; + int consistent_delay_reference_ = -10; + }; + + struct FilterAnalysisState { + explicit FilterAnalysisState(const EchoCanceller3Config& config) + : filter_length_blocks(config.filter.refined_initial.length_blocks), + consistent_filter_detector(config) { + Reset(config.ep_strength.default_gain); + } + + void Reset(float default_gain) { + peak_index = 0; + gain = default_gain; + consistent_filter_detector.Reset(); + } + + float gain; + size_t peak_index; + int filter_length_blocks; + bool consistent_estimate = false; + ConsistentFilterDetector consistent_filter_detector; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const bool bounded_erl_; + const float default_gain_; + std::vector> h_highpass_; + + size_t blocks_since_reset_ = 0; + FilterRegion region_; + + std::vector filter_analysis_states_; + std::vector filter_delays_blocks_; + + int min_filter_delay_blocks_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc new file mode 100644 index 0000000000..f1e2e4c188 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/filter_analyzer_unittest.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/filter_analyzer.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verifies that the filter analyzer handles filter resizes properly. +TEST(FilterAnalyzer, FilterResize) { + EchoCanceller3Config c; + std::vector filter(65, 0.f); + for (size_t num_capture_channels : {1, 2, 4}) { + FilterAnalyzer fa(c, num_capture_channels); + fa.SetRegionToAnalyze(filter.size()); + fa.SetRegionToAnalyze(filter.size()); + filter.resize(32); + fa.SetRegionToAnalyze(filter.size()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc new file mode 100644 index 0000000000..3039dcf7f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +FrameBlocker::FrameBlocker(size_t num_bands, size_t num_channels) + : num_bands_(num_bands), + num_channels_(num_channels), + buffer_(num_bands_, std::vector>(num_channels)) { + RTC_DCHECK_LT(0, num_bands); + RTC_DCHECK_LT(0, num_channels); + for (auto& band : buffer_) { + for (auto& channel : band) { + channel.reserve(kBlockSize); + RTC_DCHECK(channel.empty()); + } + } +} + +FrameBlocker::~FrameBlocker() = default; + +void FrameBlocker::InsertSubFrameAndExtractBlock( + const std::vector>>& sub_frame, + Block* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->NumBands()); + RTC_DCHECK_EQ(num_bands_, sub_frame.size()); + for (size_t band = 0; band < num_bands_; ++band) { + RTC_DCHECK_EQ(num_channels_, block->NumChannels()); + RTC_DCHECK_EQ(num_channels_, sub_frame[band].size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_GE(kBlockSize - 16, buffer_[band][channel].size()); + RTC_DCHECK_EQ(kSubFrameLength, sub_frame[band][channel].size()); + const int samples_to_block = kBlockSize - buffer_[band][channel].size(); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + block->begin(band, channel)); + std::copy(sub_frame[band][channel].begin(), + sub_frame[band][channel].begin() + samples_to_block, + block->begin(band, channel) + kBlockSize - samples_to_block); + buffer_[band][channel].clear(); + buffer_[band][channel].insert( + buffer_[band][channel].begin(), + sub_frame[band][channel].begin() + samples_to_block, + sub_frame[band][channel].end()); + } + } +} + +bool FrameBlocker::IsBlockAvailable() const { + return kBlockSize == buffer_[0][0].size(); +} + +void FrameBlocker::ExtractBlock(Block* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->NumBands()); + RTC_DCHECK_EQ(num_channels_, block->NumChannels()); + RTC_DCHECK(IsBlockAvailable()); + for (size_t band = 0; band < num_bands_; ++band) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + RTC_DCHECK_EQ(kBlockSize, buffer_[band][channel].size()); + std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(), + block->begin(band, channel)); + buffer_[band][channel].clear(); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h new file mode 100644 index 0000000000..623c812157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ + +#include + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block.h" + +namespace webrtc { + +// Class for producing 64 sample multiband blocks from frames consisting of 2 +// subframes of 80 samples. +class FrameBlocker { + public: + FrameBlocker(size_t num_bands, size_t num_channels); + ~FrameBlocker(); + FrameBlocker(const FrameBlocker&) = delete; + FrameBlocker& operator=(const FrameBlocker&) = delete; + + // Inserts one 80 sample multiband subframe from the multiband frame and + // extracts one 64 sample multiband block. + void InsertSubFrameAndExtractBlock( + const std::vector>>& sub_frame, + Block* block); + // Reports whether a multiband block of 64 samples is available for + // extraction. + bool IsBlockAvailable() const; + // Extracts a multiband block of 64 samples. + void ExtractBlock(Block* block); + + private: + const size_t num_bands_; + const size_t num_channels_; + std::vector>> buffer_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc new file mode 100644 index 0000000000..92e393023a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc @@ -0,0 +1,425 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_framer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t channel, + size_t sample_index, + int offset) { + float value = + static_cast(chunk_counter * chunk_size + sample_index + channel) + + offset; + return value > 0 ? 5000 * band + value : 0; +} + +void FillSubFrame(size_t sub_frame_counter, + int offset, + std::vector>>* sub_frame) { + for (size_t band = 0; band < sub_frame->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame)[band].size(); ++channel) { + for (size_t sample = 0; sample < (*sub_frame)[band][channel].size(); + ++sample) { + (*sub_frame)[band][channel][sample] = ComputeSampleValue( + sub_frame_counter, kSubFrameLength, band, channel, sample, offset); + } + } + } +} + +void FillSubFrameView( + size_t sub_frame_counter, + int offset, + std::vector>>* sub_frame, + std::vector>>* sub_frame_view) { + FillSubFrame(sub_frame_counter, offset, sub_frame); + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[band].size(); + ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*sub_frame)[band][channel][0], (*sub_frame)[band][channel].size()); + } + } +} + +bool VerifySubFrame( + size_t sub_frame_counter, + int offset, + const std::vector>>& sub_frame_view) { + std::vector>> reference_sub_frame( + sub_frame_view.size(), + std::vector>( + sub_frame_view[0].size(), + std::vector(sub_frame_view[0][0].size(), 0.f))); + FillSubFrame(sub_frame_counter, offset, &reference_sub_frame); + for (size_t band = 0; band < sub_frame_view.size(); ++band) { + for (size_t channel = 0; channel < sub_frame_view[band].size(); ++channel) { + for (size_t sample = 0; sample < sub_frame_view[band][channel].size(); + ++sample) { + if (reference_sub_frame[band][channel][sample] != + sub_frame_view[band][channel][sample]) { + return false; + } + } + } + } + return true; +} + +bool VerifyBlock(size_t block_counter, int offset, const Block& block) { + for (int band = 0; band < block.NumBands(); ++band) { + for (int channel = 0; channel < block.NumChannels(); ++channel) { + for (size_t sample = 0; sample < kBlockSize; ++sample) { + auto it = block.begin(band, channel) + sample; + const float reference_value = ComputeSampleValue( + block_counter, kBlockSize, band, channel, sample, offset); + if (reference_value != *it) { + return false; + } + } + } + } + return true; +} + +// Verifies that the FrameBlocker properly forms blocks out of the frames. +void RunBlockerTest(int sample_rate_hz, size_t num_channels) { + constexpr size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + num_bands, std::vector>(num_channels)); + FrameBlocker blocker(num_bands, num_channels); + + size_t block_counter = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + VerifyBlock(block_counter++, 0, block); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + VerifyBlock(block_counter++, 0, block); + } + } +} + +// Verifies that the FrameBlocker and BlockFramer work well together and produce +// the expected output. +void RunBlockerAndFramerTest(int sample_rate_hz, size_t num_channels) { + const size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> output_sub_frame_view( + num_bands, std::vector>(num_channels)); + std::vector>> input_sub_frame_view( + num_bands, std::vector>(num_channels)); + FrameBlocker blocker(num_bands, num_channels); + BlockFramer framer(num_bands, num_channels); + + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + FillSubFrameView(sub_frame_index, 0, &output_sub_frame, + &output_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + framer.InsertBlock(block); + } + if (sub_frame_index > 1) { + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the FrameBlocker crashes if the InsertSubFrameAndExtractBlock +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest( + int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels, + size_t num_sub_frame_bands, + size_t num_sub_frame_channels, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_block_bands, num_block_channels); + std::vector>> input_sub_frame( + num_sub_frame_bands, + std::vector>( + num_sub_frame_channels, std::vector(sub_frame_length, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), + std::vector>(num_sub_frame_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands, correct_num_channels); + EXPECT_DEATH( + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// for inputs with the wrong number of bands or band lengths. +void RunWronglySizedExtractParameterTest(int sample_rate_hz, + size_t correct_num_channels, + size_t num_block_bands, + size_t num_block_channels) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + Block correct_block(correct_num_bands, correct_num_channels); + Block wrong_block(num_block_bands, num_block_channels); + std::vector>> input_sub_frame( + correct_num_bands, + std::vector>( + correct_num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), + std::vector>(correct_num_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands, correct_num_channels); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + + EXPECT_DEATH(blocker.ExtractBlock(&wrong_block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// after a wrong number of previous InsertSubFrameAndExtractBlock method calls +// have been made. +void RunWrongExtractOrderTest(int sample_rate_hz, + size_t num_channels, + size_t num_preceeding_api_calls) { + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + Block block(num_bands, num_channels); + std::vector>> input_sub_frame( + num_bands, std::vector>( + num_channels, std::vector(kSubFrameLength, 0.f))); + std::vector>> input_sub_frame_view( + input_sub_frame.size(), std::vector>(num_channels)); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(num_bands, num_channels); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + } + + EXPECT_DEATH(blocker.ExtractBlock(&block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz, size_t num_channels) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + ss << ", number of channels: " << num_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(FrameBlockerDeathTest, + WrongNumberOfBandsInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfChannelsInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfBandsInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + wrong_num_bands, correct_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfChannelsInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels, + correct_num_bands, wrong_num_channels, kSubFrameLength); + } + } +} + +TEST(FrameBlockerDeathTest, + WrongNumberOfSamplesInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_channels, correct_num_bands, correct_num_channels, + correct_num_bands, correct_num_channels, kSubFrameLength - 1); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfBandsInBlockForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedExtractParameterTest( + rate, correct_num_channels, wrong_num_bands, correct_num_channels); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfChannelsInBlockForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t correct_num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, correct_num_channels)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_channels = correct_num_channels + 1; + RunWronglySizedExtractParameterTest( + rate, correct_num_channels, correct_num_bands, wrong_num_channels); + } + } +} + +TEST(FrameBlockerDeathTest, WrongNumberOfPreceedingApiCallsForExtractBlock) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + rtc::StringBuilder ss; + ss << "Sample rate: " << rate; + ss << "Num channels: " << num_channels; + ss << ", Num preceeding InsertSubFrameAndExtractBlock calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWrongExtractOrderTest(rate, num_channels, num_calls); + } + } + } +} + +// Verifies that the verification for 0 number of channels works. +TEST(FrameBlockerDeathTest, ZeroNumberOfChannelsParameter) { + EXPECT_DEATH(FrameBlocker(16000, 0), ""); +} + +// Verifies that the verification for 0 number of bands works. +TEST(FrameBlockerDeathTest, ZeroNumberOfBandsParameter) { + EXPECT_DEATH(FrameBlocker(0, 1), ""); +} + +// Verifiers that the verification for null sub_frame pointer works. +TEST(FrameBlockerDeathTest, NullBlockParameter) { + std::vector>> sub_frame( + 1, std::vector>( + 1, std::vector(kSubFrameLength, 0.f))); + std::vector>> sub_frame_view( + sub_frame.size()); + FillSubFrameView(0, 0, &sub_frame, &sub_frame_view); + EXPECT_DEATH( + FrameBlocker(1, 1).InsertSubFrameAndExtractBlock(sub_frame_view, nullptr), + ""); +} + +#endif + +TEST(FrameBlocker, BlockBitexactness) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunBlockerTest(rate, num_channels); + } + } +} + +TEST(FrameBlocker, BlockerAndFramer) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 4, 8}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels)); + RunBlockerAndFramerTest(rate, num_channels); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc new file mode 100644 index 0000000000..e56674e4c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { +constexpr float kEpsilon = 1e-3f; +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kBlocksToHoldErle = 100; +constexpr int kPointsToAccumulate = 6; +} // namespace + +FullBandErleEstimator::FullBandErleEstimator( + const EchoCanceller3Config::Erle& config, + size_t num_capture_channels) + : min_erle_log2_(FastApproxLog2f(config.min + kEpsilon)), + max_erle_lf_log2_(FastApproxLog2f(config.max_l + kEpsilon)), + hold_counters_instantaneous_erle_(num_capture_channels, 0), + erle_time_domain_log2_(num_capture_channels, min_erle_log2_), + instantaneous_erle_(num_capture_channels, ErleInstantaneous(config)), + linear_filters_qualities_(num_capture_channels) { + Reset(); +} + +FullBandErleEstimator::~FullBandErleEstimator() = default; + +void FullBandErleEstimator::Reset() { + for (auto& instantaneous_erle_ch : instantaneous_erle_) { + instantaneous_erle_ch.Reset(); + } + + UpdateQualityEstimates(); + std::fill(erle_time_domain_log2_.begin(), erle_time_domain_log2_.end(), + min_erle_log2_); + std::fill(hold_counters_instantaneous_erle_.begin(), + hold_counters_instantaneous_erle_.end(), 0); +} + +void FullBandErleEstimator::Update( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + for (size_t ch = 0; ch < Y2.size(); ++ch) { + if (converged_filters[ch]) { + // Computes the fullband ERLE. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + if (X2_sum > kX2BandEnergyThreshold * X2.size()) { + const float Y2_sum = + std::accumulate(Y2[ch].begin(), Y2[ch].end(), 0.0f); + const float E2_sum = + std::accumulate(E2[ch].begin(), E2[ch].end(), 0.0f); + if (instantaneous_erle_[ch].Update(Y2_sum, E2_sum)) { + hold_counters_instantaneous_erle_[ch] = kBlocksToHoldErle; + erle_time_domain_log2_[ch] += + 0.05f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) - + erle_time_domain_log2_[ch]); + erle_time_domain_log2_[ch] = + std::max(erle_time_domain_log2_[ch], min_erle_log2_); + } + } + } + --hold_counters_instantaneous_erle_[ch]; + if (hold_counters_instantaneous_erle_[ch] == 0) { + instantaneous_erle_[ch].ResetAccumulators(); + } + } + + UpdateQualityEstimates(); +} + +void FullBandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_log2", FullbandErleLog2()); + instantaneous_erle_[0].Dump(data_dumper); +} + +void FullBandErleEstimator::UpdateQualityEstimates() { + for (size_t ch = 0; ch < instantaneous_erle_.size(); ++ch) { + linear_filters_qualities_[ch] = + instantaneous_erle_[ch].GetQualityEstimate(); + } +} + +FullBandErleEstimator::ErleInstantaneous::ErleInstantaneous( + const EchoCanceller3Config::Erle& config) + : clamp_inst_quality_to_zero_(config.clamp_quality_estimate_to_zero), + clamp_inst_quality_to_one_(config.clamp_quality_estimate_to_one) { + Reset(); +} + +FullBandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; + +bool FullBandErleEstimator::ErleInstantaneous::Update(const float Y2_sum, + const float E2_sum) { + bool update_estimates = false; + E2_acum_ += E2_sum; + Y2_acum_ += Y2_sum; + num_points_++; + if (num_points_ == kPointsToAccumulate) { + if (E2_acum_ > 0.f) { + update_estimates = true; + erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon); + } + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; + } + + if (update_estimates) { + UpdateMaxMin(); + UpdateQualityEstimate(); + } + return update_estimates; +} + +void FullBandErleEstimator::ErleInstantaneous::Reset() { + ResetAccumulators(); + max_erle_log2_ = -10.f; // -30 dB. + min_erle_log2_ = 33.f; // 100 dB. + inst_quality_estimate_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::ResetAccumulators() { + erle_log2_ = absl::nullopt; + inst_quality_estimate_ = 0.f; + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_inst_log2", + erle_log2_ ? *erle_log2_ : -10.f); + data_dumper->DumpRaw( + "aec3_erle_instantaneous_quality", + GetQualityEstimate() ? GetQualityEstimate().value() : 0.f); + data_dumper->DumpRaw("aec3_fullband_erle_max_log2", max_erle_log2_); + data_dumper->DumpRaw("aec3_fullband_erle_min_log2", min_erle_log2_); +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() { + RTC_DCHECK(erle_log2_); + // Adding the forgetting factors for the maximum and minimum and capping the + // result to the incoming value. + max_erle_log2_ -= 0.0004f; // Forget factor, approx 1dB every 3 sec. + max_erle_log2_ = std::max(max_erle_log2_, erle_log2_.value()); + min_erle_log2_ += 0.0004f; // Forget factor, approx 1dB every 3 sec. + min_erle_log2_ = std::min(min_erle_log2_, erle_log2_.value()); +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() { + const float alpha = 0.07f; + float quality_estimate = 0.f; + RTC_DCHECK(erle_log2_); + // TODO(peah): Currently, the estimate can become be less than 0; this should + // be corrected. + if (max_erle_log2_ > min_erle_log2_) { + quality_estimate = (erle_log2_.value() - min_erle_log2_) / + (max_erle_log2_ - min_erle_log2_); + } + if (quality_estimate > inst_quality_estimate_) { + inst_quality_estimate_ = quality_estimate; + } else { + inst_quality_estimate_ += + alpha * (quality_estimate - inst_quality_estimate_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h new file mode 100644 index 0000000000..7a082176d6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/fullband_erle_estimator.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement using the energy of all the +// freuquency bands. +class FullBandErleEstimator { + public: + FullBandErleEstimator(const EchoCanceller3Config::Erle& config, + size_t num_capture_channels); + ~FullBandErleEstimator(); + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimator. + void Update(rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + // Returns the fullband ERLE estimates in log2 units. + float FullbandErleLog2() const { + float min_erle = erle_time_domain_log2_[0]; + for (size_t ch = 1; ch < erle_time_domain_log2_.size(); ++ch) { + min_erle = std::min(min_erle, erle_time_domain_log2_[ch]); + } + return min_erle; + } + + // Returns an estimation of the current linear filter quality. It returns a + // float number between 0 and 1 mapping 1 to the highest possible quality. + rtc::ArrayView> GetInstLinearQualityEstimates() + const { + return linear_filters_qualities_; + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateQualityEstimates(); + + class ErleInstantaneous { + public: + explicit ErleInstantaneous(const EchoCanceller3Config::Erle& config); + ~ErleInstantaneous(); + + // Updates the estimator with a new point, returns true + // if the instantaneous ERLE was updated due to having enough + // points for performing the estimate. + bool Update(float Y2_sum, float E2_sum); + // Resets the instantaneous ERLE estimator to its initial state. + void Reset(); + // Resets the members related with an instantaneous estimate. + void ResetAccumulators(); + // Returns the instantaneous ERLE in log2 units. + absl::optional GetInstErleLog2() const { return erle_log2_; } + // Gets an indication between 0 and 1 of the performance of the linear + // filter for the current time instant. + absl::optional GetQualityEstimate() const { + if (erle_log2_) { + float value = inst_quality_estimate_; + if (clamp_inst_quality_to_zero_) { + value = std::max(0.f, value); + } + if (clamp_inst_quality_to_one_) { + value = std::min(1.f, value); + } + return absl::optional(value); + } + return absl::nullopt; + } + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateMaxMin(); + void UpdateQualityEstimate(); + const bool clamp_inst_quality_to_zero_; + const bool clamp_inst_quality_to_one_; + absl::optional erle_log2_; + float inst_quality_estimate_; + float max_erle_log2_; + float min_erle_log2_; + float Y2_acum_; + float E2_acum_; + int num_points_; + }; + + const float min_erle_log2_; + const float max_erle_lf_log2_; + std::vector hold_counters_instantaneous_erle_; + std::vector erle_time_domain_log2_; + std::vector instantaneous_erle_; + std::vector> linear_filters_qualities_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc new file mode 100644 index 0000000000..af30ff1b9f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.cc @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace { + +// Subsample rate used for computing the accumulated error. +// The implementation of some core functions depends on this constant being +// equal to 4. +constexpr int kAccumulatedErrorSubSampleRate = 4; + +void UpdateAccumulatedError( + const rtc::ArrayView instantaneous_accumulated_error, + const rtc::ArrayView accumulated_error, + float one_over_error_sum_anchor, + float smooth_constant_increases) { + for (size_t k = 0; k < instantaneous_accumulated_error.size(); ++k) { + float error_norm = + instantaneous_accumulated_error[k] * one_over_error_sum_anchor; + if (error_norm < accumulated_error[k]) { + accumulated_error[k] = error_norm; + } else { + accumulated_error[k] += + smooth_constant_increases * (error_norm - accumulated_error[k]); + } + } +} + +size_t ComputePreEchoLag( + const webrtc::MatchedFilter::PreEchoConfiguration& pre_echo_configuration, + const rtc::ArrayView accumulated_error, + size_t lag, + size_t alignment_shift_winner) { + RTC_DCHECK_GE(lag, alignment_shift_winner); + size_t pre_echo_lag_estimate = lag - alignment_shift_winner; + size_t maximum_pre_echo_lag = + std::min(pre_echo_lag_estimate / kAccumulatedErrorSubSampleRate, + accumulated_error.size()); + switch (pre_echo_configuration.mode) { + case 0: + // Mode 0: Pre echo lag is defined as the first coefficient with an error + // lower than a threshold with a certain decrease slope. + for (size_t k = 1; k < maximum_pre_echo_lag; ++k) { + if (accumulated_error[k] < + pre_echo_configuration.threshold * accumulated_error[k - 1] && + accumulated_error[k] < pre_echo_configuration.threshold) { + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + break; + } + } + break; + case 1: + // Mode 1: Pre echo lag is defined as the first coefficient with an error + // lower than a certain threshold. + for (size_t k = 0; k < maximum_pre_echo_lag; ++k) { + if (accumulated_error[k] < pre_echo_configuration.threshold) { + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + break; + } + } + break; + case 2: + case 3: + // Mode 2,3: Pre echo lag is defined as the closest coefficient to the lag + // with an error lower than a certain threshold. + for (int k = static_cast(maximum_pre_echo_lag) - 1; k >= 0; --k) { + if (accumulated_error[k] > pre_echo_configuration.threshold) { + break; + } + pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1; + } + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } + return pre_echo_lag_estimate + alignment_shift_winner; +} + +webrtc::MatchedFilter::PreEchoConfiguration FetchPreEchoConfiguration() { + float threshold = 0.5f; + int mode = 0; + const std::string pre_echo_configuration_field_trial = + webrtc::field_trial::FindFullName("WebRTC-Aec3PreEchoConfiguration"); + webrtc::FieldTrialParameter threshold_field_trial_parameter( + /*key=*/"threshold", /*default_value=*/threshold); + webrtc::FieldTrialParameter mode_field_trial_parameter( + /*key=*/"mode", /*default_value=*/mode); + webrtc::ParseFieldTrial( + {&threshold_field_trial_parameter, &mode_field_trial_parameter}, + pre_echo_configuration_field_trial); + float threshold_read = + static_cast(threshold_field_trial_parameter.Get()); + int mode_read = mode_field_trial_parameter.Get(); + if (threshold_read < 1.0f && threshold_read > 0.0f) { + threshold = threshold_read; + } else { + RTC_LOG(LS_ERROR) + << "AEC3: Pre echo configuration: wrong input, threshold = " + << threshold_read << "."; + } + if (mode_read >= 0 && mode_read <= 3) { + mode = mode_read; + } else { + RTC_LOG(LS_ERROR) << "AEC3: Pre echo configuration: wrong input, mode = " + << mode_read << "."; + } + RTC_LOG(LS_INFO) << "AEC3: Pre echo configuration: threshold = " << threshold + << ", mode = " << mode << "."; + return {.threshold = threshold, .mode = mode}; +} + +} // namespace + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +inline float SumAllElements(float32x4_t elements) { + float32x2_t sum = vpadd_f32(vget_low_f32(elements), vget_high_f32(elements)); + sum = vpadd_f32(sum, sum); + return vget_lane_f32(sum, 0); +} + +void MatchedFilterCoreWithAccumulatedError_NEON( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* accumulated_error_p = &accumulated_error[0]; + // Initialize values for the accumulation. + float32x4_t x2_sum_128 = vdupq_n_f32(0); + float x2_sum = 0.f; + float s = 0; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; + --k, h_p += 4, x_p += 4, accumulated_error_p++) { + // Load the data into 128 bit vectors. + const float32x4_t x_k = vld1q_f32(x_p); + const float32x4_t h_k = vld1q_f32(h_p); + // Compute and accumulate x * x. + x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); + // Compute x * h + float32x4_t hk_xk_128 = vmulq_f32(h_k, x_k); + s += SumAllElements(hk_xk_128); + const float e = s - y[i]; + accumulated_error_p[0] += e * e; + } + // Combine the accumulated vector and scalar values. + x2_sum += SumAllElements(x2_sum_128); + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const float32x4_t alpha_128 = vmovq_n_f32(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + float32x4_t h_k = vld1q_f32(h_p); + const float32x4_t x_k = vld1q_f32(x_p); + // Compute h = h + alpha * x. + h_k = vmlaq_f32(h_k, alpha_128, x_k); + // Store the result. + vst1q_f32(h_p, h_k); + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + + if (compute_accumulated_error) { + return MatchedFilterCoreWithAccumulatedError_NEON( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + float32x4_t s_128 = vdupq_n_f32(0); + float32x4_t x2_sum_128 = vdupq_n_f32(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + const float32x4_t x_k = vld1q_f32(x_p); + const float32x4_t h_k = vld1q_f32(h_p); + // Compute and accumulate x * x and h * x. + x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); + s_128 = vmlaq_f32(s_128, h_k, x_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Combine the accumulated vector and scalar values. + s += SumAllElements(s_128); + x2_sum += SumAllElements(x2_sum_128); + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const float32x4_t alpha_128 = vmovq_n_f32(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + float32x4_t h_k = vld1q_f32(h_p); + const float32x4_t x_k = vld1q_f32(x_p); + // Compute h = h + alpha * x. + h_k = vmlaq_f32(h_k, alpha_128, x_k); + + // Store the result. + vst1q_f32(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void MatchedFilterCore_AccumulatedError_SSE2( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 8); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* a_p = &accumulated_error[0]; + __m128 s_inst_128; + __m128 s_inst_128_4; + __m128 x2_sum_128 = _mm_set1_ps(0); + __m128 x2_sum_128_4 = _mm_set1_ps(0); + __m128 e_128; + float* const s_p = reinterpret_cast(&s_inst_128); + float* const s_4_p = reinterpret_cast(&s_inst_128_4); + float* const e_p = reinterpret_cast(&e_128); + float x2_sum = 0.0f; + float s_acum = 0; + // Perform 128 bit vector operations. + const int limit_by_8 = h_size >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8, a_p += 2) { + // Load the data into 128 bit vectors. + const __m128 x_k = _mm_loadu_ps(x_p); + const __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k_4 = _mm_loadu_ps(x_p + 4); + const __m128 h_k_4 = _mm_loadu_ps(h_p + 4); + const __m128 xx = _mm_mul_ps(x_k, x_k); + const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4); + // Compute and accumulate x * x and h * x. + x2_sum_128 = _mm_add_ps(x2_sum_128, xx); + x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4); + s_inst_128 = _mm_mul_ps(h_k, x_k); + s_inst_128_4 = _mm_mul_ps(h_k_4, x_k_4); + s_acum += s_p[0] + s_p[1] + s_p[2] + s_p[3]; + e_p[0] = s_acum - y[i]; + s_acum += s_4_p[0] + s_4_p[1] + s_4_p[2] + s_4_p[3]; + e_p[1] = s_acum - y[i]; + a_p[0] += e_p[0] * e_p[0]; + a_p[1] += e_p[1] * e_p[1]; + } + // Combine the accumulated vector and scalar values. + x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4); + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + // Compute the matched filter error. + float e = y[i] - s_acum; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m128 alpha_128 = _mm_set1_ps(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 128 bit vector operations. + const int limit_by_4 = h_size >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k = _mm_loadu_ps(x_p); + // Compute h = h + alpha * x. + const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k); + h_k = _mm_add_ps(h_k, alpha_x); + // Store the result. + _mm_storeu_ps(h_p, h_k); + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + if (compute_accumulated_error) { + return MatchedFilterCore_AccumulatedError_SSE2( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + // Initialize values for the accumulation. + __m128 s_128 = _mm_set1_ps(0); + __m128 s_128_4 = _mm_set1_ps(0); + __m128 x2_sum_128 = _mm_set1_ps(0); + __m128 x2_sum_128_4 = _mm_set1_ps(0); + float x2_sum = 0.f; + float s = 0; + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_8 = limit >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 128 bit vectors. + const __m128 x_k = _mm_loadu_ps(x_p); + const __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k_4 = _mm_loadu_ps(x_p + 4); + const __m128 h_k_4 = _mm_loadu_ps(h_p + 4); + const __m128 xx = _mm_mul_ps(x_k, x_k); + const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4); + // Compute and accumulate x * x and h * x. + x2_sum_128 = _mm_add_ps(x2_sum_128, xx); + x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4); + const __m128 hx = _mm_mul_ps(h_k, x_k); + const __m128 hx_4 = _mm_mul_ps(h_k_4, x_k_4); + s_128 = _mm_add_ps(s_128, hx); + s_128_4 = _mm_add_ps(s_128_4, hx_4); + } + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + x_p = &x[0]; + } + // Combine the accumulated vector and scalar values. + x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4); + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + s_128 = _mm_add_ps(s_128, s_128_4); + v = reinterpret_cast(&s_128); + s += v[0] + v[1] + v[2] + v[3]; + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m128 alpha_128 = _mm_set1_ps(alpha); + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k = _mm_loadu_ps(x_p); + + // Compute h = h + alpha * x. + const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k); + h_k = _mm_add_ps(h_k, alpha_x); + // Store the result. + _mm_storeu_ps(h_p, h_k); + } + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + x_p = &x[0]; + } + *filters_updated = true; + } + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} +#endif + +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error) { + if (compute_accumulated_error) { + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + } + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + float x2_sum = 0.f; + float s = 0; + size_t x_index = x_start_index; + if (compute_accumulated_error) { + for (size_t k = 0; k < h.size(); ++k) { + x2_sum += x[x_index] * x[x_index]; + s += h[k] * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + if ((k + 1 & 0b11) == 0) { + int idx = k >> 2; + accumulated_error[idx] += (y[i] - s) * (y[i] - s); + } + } + } else { + for (size_t k = 0; k < h.size(); ++k) { + x2_sum += x[x_index] * x[x_index]; + s += h[k] * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + } + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + size_t x_index = x_start_index; + for (size_t k = 0; k < h.size(); ++k) { + h[k] += alpha * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x.size() - 1; + } +} + +size_t MaxSquarePeakIndex(rtc::ArrayView h) { + if (h.size() < 2) { + return 0; + } + float max_element1 = h[0] * h[0]; + float max_element2 = h[1] * h[1]; + size_t lag_estimate1 = 0; + size_t lag_estimate2 = 1; + const size_t last_index = h.size() - 1; + // Keeping track of even & odd max elements separately typically allows the + // compiler to produce more efficient code. + for (size_t k = 2; k < last_index; k += 2) { + float element1 = h[k] * h[k]; + float element2 = h[k + 1] * h[k + 1]; + if (element1 > max_element1) { + max_element1 = element1; + lag_estimate1 = k; + } + if (element2 > max_element2) { + max_element2 = element2; + lag_estimate2 = k + 1; + } + } + if (max_element2 > max_element1) { + max_element1 = max_element2; + lag_estimate1 = lag_estimate2; + } + // In case of odd h size, we have not yet checked the last element. + float last_element = h[last_index] * h[last_index]; + if (last_element > max_element1) { + return last_index; + } + return lag_estimate1; +} + +} // namespace aec3 + +MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit, + float smoothing_fast, + float smoothing_slow, + float matching_filter_threshold, + bool detect_pre_echo) + : data_dumper_(data_dumper), + optimization_(optimization), + sub_block_size_(sub_block_size), + filter_intra_lag_shift_(alignment_shift_sub_blocks * sub_block_size_), + filters_( + num_matched_filters, + std::vector(window_size_sub_blocks * sub_block_size_, 0.f)), + filters_offsets_(num_matched_filters, 0), + excitation_limit_(excitation_limit), + smoothing_fast_(smoothing_fast), + smoothing_slow_(smoothing_slow), + matching_filter_threshold_(matching_filter_threshold), + detect_pre_echo_(detect_pre_echo), + pre_echo_config_(FetchPreEchoConfiguration()) { + RTC_DCHECK(data_dumper); + RTC_DCHECK_LT(0, window_size_sub_blocks); + RTC_DCHECK((kBlockSize % sub_block_size) == 0); + RTC_DCHECK((sub_block_size % 4) == 0); + static_assert(kAccumulatedErrorSubSampleRate == 4); + if (detect_pre_echo_) { + accumulated_error_ = std::vector>( + num_matched_filters, + std::vector(window_size_sub_blocks * sub_block_size_ / + kAccumulatedErrorSubSampleRate, + 1.0f)); + + instantaneous_accumulated_error_ = + std::vector(window_size_sub_blocks * sub_block_size_ / + kAccumulatedErrorSubSampleRate, + 0.0f); + scratch_memory_ = + std::vector(window_size_sub_blocks * sub_block_size_); + } +} + +MatchedFilter::~MatchedFilter() = default; + +void MatchedFilter::Reset(bool full_reset) { + for (auto& f : filters_) { + std::fill(f.begin(), f.end(), 0.f); + } + + winner_lag_ = absl::nullopt; + reported_lag_estimate_ = absl::nullopt; + if (pre_echo_config_.mode != 3 || full_reset) { + for (auto& e : accumulated_error_) { + std::fill(e.begin(), e.end(), 1.0f); + } + number_pre_echo_updates_ = 0; + } +} + +void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView capture, + bool use_slow_smoothing) { + RTC_DCHECK_EQ(sub_block_size_, capture.size()); + auto& y = capture; + + const float smoothing = + use_slow_smoothing ? smoothing_slow_ : smoothing_fast_; + + const float x2_sum_threshold = + filters_[0].size() * excitation_limit_ * excitation_limit_; + + // Compute anchor for the matched filter error. + float error_sum_anchor = 0.0f; + for (size_t k = 0; k < y.size(); ++k) { + error_sum_anchor += y[k] * y[k]; + } + + // Apply all matched filters. + float winner_error_sum = error_sum_anchor; + winner_lag_ = absl::nullopt; + reported_lag_estimate_ = absl::nullopt; + size_t alignment_shift = 0; + absl::optional previous_lag_estimate; + const int num_filters = static_cast(filters_.size()); + int winner_index = -1; + for (int n = 0; n < num_filters; ++n) { + float error_sum = 0.f; + bool filters_updated = false; + const bool compute_pre_echo = + detect_pre_echo_ && n == last_detected_best_lag_filter_; + + size_t x_start_index = + (render_buffer.read + alignment_shift + sub_block_size_ - 1) % + render_buffer.buffer.size(); + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::MatchedFilterCore_SSE2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; + case Aec3Optimization::kAvx2: + aec3::MatchedFilterCore_AVX2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::MatchedFilterCore_NEON( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; +#endif + default: + aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_); + } + + // Estimate the lag in the matched filter as the distance to the portion in + // the filter that contributes the most to the matched filter output. This + // is detected as the peak of the matched filter. + const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]); + const bool reliable = + lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) && + error_sum < matching_filter_threshold_ * error_sum_anchor; + + // Find the best estimate + const size_t lag = lag_estimate + alignment_shift; + if (filters_updated && reliable && error_sum < winner_error_sum) { + winner_error_sum = error_sum; + winner_index = n; + // In case that 2 matched filters return the same winner candidate + // (overlap region), the one with the smaller index is chosen in order + // to search for pre-echoes. + if (previous_lag_estimate && previous_lag_estimate == lag) { + winner_lag_ = previous_lag_estimate; + winner_index = n - 1; + } else { + winner_lag_ = lag; + } + } + previous_lag_estimate = lag; + alignment_shift += filter_intra_lag_shift_; + } + + if (winner_index != -1) { + RTC_DCHECK(winner_lag_.has_value()); + reported_lag_estimate_ = + LagEstimate(winner_lag_.value(), /*pre_echo_lag=*/winner_lag_.value()); + if (detect_pre_echo_ && last_detected_best_lag_filter_ == winner_index) { + const float energy_threshold = + pre_echo_config_.mode == 3 ? 1.0f : 30.0f * 30.0f * y.size(); + + if (error_sum_anchor > energy_threshold) { + const float smooth_constant_increases = + pre_echo_config_.mode != 3 ? 0.01f : 0.015f; + + UpdateAccumulatedError( + instantaneous_accumulated_error_, accumulated_error_[winner_index], + 1.0f / error_sum_anchor, smooth_constant_increases); + number_pre_echo_updates_++; + } + if (pre_echo_config_.mode != 3 || number_pre_echo_updates_ >= 50) { + reported_lag_estimate_->pre_echo_lag = ComputePreEchoLag( + pre_echo_config_, accumulated_error_[winner_index], + winner_lag_.value(), + winner_index * filter_intra_lag_shift_ /*alignment_shift_winner*/); + } else { + reported_lag_estimate_->pre_echo_lag = winner_lag_.value(); + } + } + last_detected_best_lag_filter_ = winner_index; + } + if (ApmDataDumper::IsAvailable()) { + Dump(); + data_dumper_->DumpRaw("error_sum_anchor", error_sum_anchor / y.size()); + data_dumper_->DumpRaw("number_pre_echo_updates", number_pre_echo_updates_); + data_dumper_->DumpRaw("filter_smoothing", smoothing); + } +} + +void MatchedFilter::LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const { + size_t alignment_shift = 0; + constexpr int kFsBy1000 = 16; + for (size_t k = 0; k < filters_.size(); ++k) { + int start = static_cast(alignment_shift * downsampling_factor); + int end = static_cast((alignment_shift + filters_[k].size()) * + downsampling_factor); + RTC_LOG(LS_VERBOSE) << "Filter " << k << ": start: " + << (start - static_cast(shift)) / kFsBy1000 + << " ms, end: " + << (end - static_cast(shift)) / kFsBy1000 + << " ms."; + alignment_shift += filter_intra_lag_shift_; + } +} + +void MatchedFilter::Dump() { + for (size_t n = 0; n < filters_.size(); ++n) { + const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]); + std::string dumper_filter = "aec3_correlator_" + std::to_string(n) + "_h"; + data_dumper_->DumpRaw(dumper_filter.c_str(), filters_[n]); + std::string dumper_lag = "aec3_correlator_lag_" + std::to_string(n); + data_dumper_->DumpRaw(dumper_lag.c_str(), + lag_estimate + n * filter_intra_lag_shift_); + if (detect_pre_echo_) { + std::string dumper_error = + "aec3_correlator_error_" + std::to_string(n) + "_h"; + data_dumper_->DumpRaw(dumper_error.c_str(), accumulated_error_[n]); + + size_t pre_echo_lag = + ComputePreEchoLag(pre_echo_config_, accumulated_error_[n], + lag_estimate + n * filter_intra_lag_shift_, + n * filter_intra_lag_shift_); + std::string dumper_pre_lag = + "aec3_correlator_pre_echo_lag_" + std::to_string(n); + data_dumper_->DumpRaw(dumper_pre_lag.c_str(), pre_echo_lag); + if (static_cast(n) == last_detected_best_lag_filter_) { + data_dumper_->DumpRaw("aec3_pre_echo_delay_winner_inst", pre_echo_lag); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h new file mode 100644 index 0000000000..bb54fba2b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ + +#include + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { + +class ApmDataDumper; +struct DownsampledRenderBuffer; + +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +// Filter core for the matched filter that is optimized for NEON. +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulation_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +// Filter core for the matched filter that is optimized for SSE2. +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +// Filter core for the matched filter that is optimized for AVX2. +void MatchedFilterCore_AVX2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory); + +#endif + +// Filter core for the matched filter. +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulation_error, + rtc::ArrayView accumulated_error); + +// Find largest peak of squared values in array. +size_t MaxSquarePeakIndex(rtc::ArrayView h); + +} // namespace aec3 + +// Produces recursively updated cross-correlation estimates for several signal +// shifts where the intra-shift spacing is uniform. +class MatchedFilter { + public: + // Stores properties for the lag estimate corresponding to a particular signal + // shift. + struct LagEstimate { + LagEstimate() = default; + LagEstimate(size_t lag, size_t pre_echo_lag) + : lag(lag), pre_echo_lag(pre_echo_lag) {} + size_t lag = 0; + size_t pre_echo_lag = 0; + }; + + struct PreEchoConfiguration { + const float threshold; + const int mode; + }; + + MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit, + float smoothing_fast, + float smoothing_slow, + float matching_filter_threshold, + bool detect_pre_echo); + + MatchedFilter() = delete; + MatchedFilter(const MatchedFilter&) = delete; + MatchedFilter& operator=(const MatchedFilter&) = delete; + + ~MatchedFilter(); + + // Updates the correlation with the values in the capture buffer. + void Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView capture, + bool use_slow_smoothing); + + // Resets the matched filter. + void Reset(bool full_reset); + + // Returns the current lag estimates. + absl::optional GetBestLagEstimate() const { + return reported_lag_estimate_; + } + + // Returns the maximum filter lag. + size_t GetMaxFilterLag() const { + return filters_.size() * filter_intra_lag_shift_ + filters_[0].size(); + } + + // Log matched filter properties. + void LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const; + + private: + FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest, + PreEchoConfigurationTest); + FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest, + WrongPreEchoConfigurationTest); + + // Only for testing. Gets the pre echo detection configuration. + const PreEchoConfiguration& GetPreEchoConfiguration() const { + return pre_echo_config_; + } + void Dump(); + + ApmDataDumper* const data_dumper_; + const Aec3Optimization optimization_; + const size_t sub_block_size_; + const size_t filter_intra_lag_shift_; + std::vector> filters_; + std::vector> accumulated_error_; + std::vector instantaneous_accumulated_error_; + std::vector scratch_memory_; + absl::optional reported_lag_estimate_; + absl::optional winner_lag_; + int last_detected_best_lag_filter_ = -1; + std::vector filters_offsets_; + int number_pre_echo_updates_ = 0; + const float excitation_limit_; + const float smoothing_fast_; + const float smoothing_slow_; + const float matching_filter_threshold_; + const bool detect_pre_echo_; + const PreEchoConfiguration pre_echo_config_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc new file mode 100644 index 0000000000..8c2ffcbd1e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_avx2.cc @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/audio_processing/aec3/matched_filter.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Let ha denote the horizontal of a, and hb the horizontal sum of b +// returns [ha, hb, ha, hb] +inline __m128 hsum_ab(__m256 a, __m256 b) { + __m256 s_256 = _mm256_hadd_ps(a, b); + const __m256i mask = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0); + s_256 = _mm256_permutevar8x32_ps(s_256, mask); + __m128 s = _mm_hadd_ps(_mm256_extractf128_ps(s_256, 0), + _mm256_extractf128_ps(s_256, 1)); + s = _mm_hadd_ps(s, s); + return s; +} + +void MatchedFilterCore_AccumulatedError_AVX2( + size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 16); + std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + RTC_DCHECK_GT(x_size, x_start_index); + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + if (chunk1 != h_size) { + const int chunk2 = h_size - chunk1; + std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin()); + std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1); + } + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + const float* h_p = &h[0]; + float* a_p = &accumulated_error[0]; + __m256 s_inst_hadd_256; + __m256 s_inst_256; + __m256 s_inst_256_8; + __m256 x2_sum_256 = _mm256_set1_ps(0); + __m256 x2_sum_256_8 = _mm256_set1_ps(0); + __m128 e_128; + float x2_sum = 0.0f; + float s_acum = 0; + const int limit_by_16 = h_size >> 4; + for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16, a_p += 4) { + // Load the data into 256 bit vectors. + __m256 x_k = _mm256_loadu_ps(x_p); + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k_8 = _mm256_loadu_ps(x_p + 8); + __m256 h_k_8 = _mm256_loadu_ps(h_p + 8); + // Compute and accumulate x * x and h * x. + x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256); + x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8); + s_inst_256 = _mm256_mul_ps(h_k, x_k); + s_inst_256_8 = _mm256_mul_ps(h_k_8, x_k_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_256, s_inst_256_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_hadd_256, s_inst_hadd_256); + s_acum += s_inst_hadd_256[0]; + e_128[0] = s_acum - y[i]; + s_acum += s_inst_hadd_256[4]; + e_128[1] = s_acum - y[i]; + s_acum += s_inst_hadd_256[1]; + e_128[2] = s_acum - y[i]; + s_acum += s_inst_hadd_256[5]; + e_128[3] = s_acum - y[i]; + + __m128 accumulated_error = _mm_load_ps(a_p); + accumulated_error = _mm_fmadd_ps(e_128, e_128, accumulated_error); + _mm_storeu_ps(a_p, accumulated_error); + } + // Sum components together. + x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8); + __m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0), + _mm256_extractf128_ps(x2_sum_256, 1)); + // Combine the accumulated vector and scalar values. + float* v = reinterpret_cast(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + + // Compute the matched filter error. + float e = y[i] - s_acum; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m256 alpha_256 = _mm256_set1_ps(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + const float* x_p = + chunk1 != h_size ? scratch_memory.data() : &x[x_start_index]; + // Perform 256 bit vector operations. + const int limit_by_8 = h_size >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 256 bit vectors. + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k = _mm256_loadu_ps(x_p); + // Compute h = h + alpha * x. + h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k); + + // Store the result. + _mm256_storeu_ps(h_p, h_k); + } + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +void MatchedFilterCore_AVX2(size_t x_start_index, + float x2_sum_threshold, + float smoothing, + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView h, + bool* filters_updated, + float* error_sum, + bool compute_accumulated_error, + rtc::ArrayView accumulated_error, + rtc::ArrayView scratch_memory) { + if (compute_accumulated_error) { + return MatchedFilterCore_AccumulatedError_AVX2( + x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated, + error_sum, accumulated_error, scratch_memory); + } + const int h_size = static_cast(h.size()); + const int x_size = static_cast(x.size()); + RTC_DCHECK_EQ(0, h_size % 8); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + __m256 s_256 = _mm256_set1_ps(0); + __m256 s_256_8 = _mm256_set1_ps(0); + __m256 x2_sum_256 = _mm256_set1_ps(0); + __m256 x2_sum_256_8 = _mm256_set1_ps(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 256 bit vector operations. + const int limit_by_16 = limit >> 4; + for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16) { + // Load the data into 256 bit vectors. + __m256 x_k = _mm256_loadu_ps(x_p); + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k_8 = _mm256_loadu_ps(x_p + 8); + __m256 h_k_8 = _mm256_loadu_ps(h_p + 8); + // Compute and accumulate x * x and h * x. + x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256); + x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8); + s_256 = _mm256_fmadd_ps(h_k, x_k, s_256); + s_256_8 = _mm256_fmadd_ps(h_k_8, x_k_8, s_256_8); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_16 * 16; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Sum components together. + x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8); + s_256 = _mm256_add_ps(s_256, s_256_8); + __m128 sum = hsum_ab(x2_sum_256, s_256); + x2_sum += sum[0]; + s += sum[1]; + + // Compute the matched filter error. + float e = y[i] - s; + const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f; + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold && !saturation) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = smoothing * e / x2_sum; + const __m256 alpha_256 = _mm256_set1_ps(alpha); + + // filter = filter + smoothing * (y - filter * x) * x / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 256 bit vector operations. + const int limit_by_8 = limit >> 3; + for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) { + // Load the data into 256 bit vectors. + __m256 h_k = _mm256_loadu_ps(h_p); + __m256 x_k = _mm256_loadu_ps(x_p); + // Compute h = h + alpha * x. + h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k); + + // Store the result. + _mm256_storeu_ps(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build new file mode 100644 index 0000000000..bae4fa2972 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("matched_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc new file mode 100644 index 0000000000..bea7868a91 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include +#include + +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { +constexpr int kPreEchoHistogramDataNotUpdated = -1; + +int GetDownSamplingBlockSizeLog2(int down_sampling_factor) { + int down_sampling_factor_log2 = 0; + down_sampling_factor >>= 1; + while (down_sampling_factor > 0) { + down_sampling_factor_log2++; + down_sampling_factor >>= 1; + } + return static_cast(kBlockSizeLog2) > down_sampling_factor_log2 + ? static_cast(kBlockSizeLog2) - down_sampling_factor_log2 + : 0; +} +} // namespace + +MatchedFilterLagAggregator::MatchedFilterLagAggregator( + ApmDataDumper* data_dumper, + size_t max_filter_lag, + const EchoCanceller3Config::Delay& delay_config) + : data_dumper_(data_dumper), + thresholds_(delay_config.delay_selection_thresholds), + headroom_(static_cast(delay_config.delay_headroom_samples / + delay_config.down_sampling_factor)), + highest_peak_aggregator_(max_filter_lag) { + if (delay_config.detect_pre_echo) { + pre_echo_lag_aggregator_ = std::make_unique( + max_filter_lag, delay_config.down_sampling_factor); + } + RTC_DCHECK(data_dumper); + RTC_DCHECK_LE(thresholds_.initial, thresholds_.converged); +} + +MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default; + +void MatchedFilterLagAggregator::Reset(bool hard_reset) { + highest_peak_aggregator_.Reset(); + if (pre_echo_lag_aggregator_ != nullptr) { + pre_echo_lag_aggregator_->Reset(); + } + if (hard_reset) { + significant_candidate_found_ = false; + } +} + +absl::optional MatchedFilterLagAggregator::Aggregate( + const absl::optional& lag_estimate) { + if (lag_estimate && pre_echo_lag_aggregator_) { + pre_echo_lag_aggregator_->Dump(data_dumper_); + pre_echo_lag_aggregator_->Aggregate( + std::max(0, static_cast(lag_estimate->pre_echo_lag) - headroom_)); + } + + if (lag_estimate) { + highest_peak_aggregator_.Aggregate( + std::max(0, static_cast(lag_estimate->lag) - headroom_)); + rtc::ArrayView histogram = highest_peak_aggregator_.histogram(); + int candidate = highest_peak_aggregator_.candidate(); + significant_candidate_found_ = significant_candidate_found_ || + histogram[candidate] > thresholds_.converged; + if (histogram[candidate] > thresholds_.converged || + (histogram[candidate] > thresholds_.initial && + !significant_candidate_found_)) { + DelayEstimate::Quality quality = significant_candidate_found_ + ? DelayEstimate::Quality::kRefined + : DelayEstimate::Quality::kCoarse; + int reported_delay = pre_echo_lag_aggregator_ != nullptr + ? pre_echo_lag_aggregator_->pre_echo_candidate() + : candidate; + return DelayEstimate(quality, reported_delay); + } + } + + return absl::nullopt; +} + +MatchedFilterLagAggregator::HighestPeakAggregator::HighestPeakAggregator( + size_t max_filter_lag) + : histogram_(max_filter_lag + 1, 0) { + histogram_data_.fill(0); +} + +void MatchedFilterLagAggregator::HighestPeakAggregator::Reset() { + std::fill(histogram_.begin(), histogram_.end(), 0); + histogram_data_.fill(0); + histogram_data_index_ = 0; +} + +void MatchedFilterLagAggregator::HighestPeakAggregator::Aggregate(int lag) { + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + --histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_[histogram_data_index_] = lag; + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + ++histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size(); + candidate_ = + std::distance(histogram_.begin(), + std::max_element(histogram_.begin(), histogram_.end())); +} + +MatchedFilterLagAggregator::PreEchoLagAggregator::PreEchoLagAggregator( + size_t max_filter_lag, + size_t down_sampling_factor) + : block_size_log2_(GetDownSamplingBlockSizeLog2(down_sampling_factor)), + histogram_( + ((max_filter_lag + 1) * down_sampling_factor) >> kBlockSizeLog2, + 0) { + Reset(); +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Reset() { + std::fill(histogram_.begin(), histogram_.end(), 0); + histogram_data_.fill(kPreEchoHistogramDataNotUpdated); + histogram_data_index_ = 0; + pre_echo_candidate_ = 0; +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Aggregate( + int pre_echo_lag) { + int pre_echo_block_size = pre_echo_lag >> block_size_log2_; + RTC_DCHECK(pre_echo_block_size >= 0 && + pre_echo_block_size < static_cast(histogram_.size())); + pre_echo_block_size = + rtc::SafeClamp(pre_echo_block_size, 0, histogram_.size() - 1); + // Remove the oldest point from the `histogram_`, it ignores the initial + // points where no updates have been done to the `histogram_data_` array. + if (histogram_data_[histogram_data_index_] != + kPreEchoHistogramDataNotUpdated) { + --histogram_[histogram_data_[histogram_data_index_]]; + } + histogram_data_[histogram_data_index_] = pre_echo_block_size; + ++histogram_[histogram_data_[histogram_data_index_]]; + histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size(); + int pre_echo_candidate_block_size = + std::distance(histogram_.begin(), + std::max_element(histogram_.begin(), histogram_.end())); + pre_echo_candidate_ = (pre_echo_candidate_block_size << block_size_log2_); +} + +void MatchedFilterLagAggregator::PreEchoLagAggregator::Dump( + ApmDataDumper* const data_dumper) { + data_dumper->DumpRaw("aec3_pre_echo_delay_candidate", pre_echo_candidate_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h new file mode 100644 index 0000000000..c0598bf226 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/matched_filter.h" + +namespace webrtc { + +class ApmDataDumper; + +// Aggregates lag estimates produced by the MatchedFilter class into a single +// reliable combined lag estimate. +class MatchedFilterLagAggregator { + public: + MatchedFilterLagAggregator(ApmDataDumper* data_dumper, + size_t max_filter_lag, + const EchoCanceller3Config::Delay& delay_config); + + MatchedFilterLagAggregator() = delete; + MatchedFilterLagAggregator(const MatchedFilterLagAggregator&) = delete; + MatchedFilterLagAggregator& operator=(const MatchedFilterLagAggregator&) = + delete; + + ~MatchedFilterLagAggregator(); + + // Resets the aggregator. + void Reset(bool hard_reset); + + // Aggregates the provided lag estimates. + absl::optional Aggregate( + const absl::optional& lag_estimate); + + // Returns whether a reliable delay estimate has been found. + bool ReliableDelayFound() const { return significant_candidate_found_; } + + // Returns the delay candidate that is computed by looking at the highest peak + // on the matched filters. + int GetDelayAtHighestPeak() const { + return highest_peak_aggregator_.candidate(); + } + + private: + class PreEchoLagAggregator { + public: + PreEchoLagAggregator(size_t max_filter_lag, size_t down_sampling_factor); + void Reset(); + void Aggregate(int pre_echo_lag); + int pre_echo_candidate() const { return pre_echo_candidate_; } + void Dump(ApmDataDumper* const data_dumper); + + private: + const int block_size_log2_; + std::array histogram_data_; + std::vector histogram_; + int histogram_data_index_ = 0; + int pre_echo_candidate_ = 0; + }; + + class HighestPeakAggregator { + public: + explicit HighestPeakAggregator(size_t max_filter_lag); + void Reset(); + void Aggregate(int lag); + int candidate() const { return candidate_; } + rtc::ArrayView histogram() const { return histogram_; } + + private: + std::vector histogram_; + std::array histogram_data_; + int histogram_data_index_ = 0; + int candidate_ = -1; + }; + + ApmDataDumper* const data_dumper_; + bool significant_candidate_found_ = false; + const EchoCanceller3Config::Delay::DelaySelectionThresholds thresholds_; + const int headroom_; + HighestPeakAggregator highest_peak_aggregator_; + std::unique_ptr pre_echo_lag_aggregator_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc new file mode 100644 index 0000000000..6804102584 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr size_t kNumLagsBeforeDetection = 26; + +} // namespace + +// Verifies that varying lag estimates causes lag estimates to not be deemed +// reliable. +TEST(MatchedFilterLagAggregator, + LagEstimateInvarianceRequiredForAggregatedLag) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilterLagAggregator aggregator(&data_dumper, /*max_filter_lag=*/100, + config.delay); + + absl::optional aggregated_lag; + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/10, /*pre_echo_lag=*/10)); + } + EXPECT_TRUE(aggregated_lag); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 100; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/k % 100, /*pre_echo_lag=*/k % 100)); + } + EXPECT_FALSE(aggregated_lag); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 100; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/k % 100, /*pre_echo_lag=*/k % 100)); + EXPECT_FALSE(aggregated_lag); + } +} + +// Verifies that lag estimate updates are required to produce an updated lag +// aggregate. +TEST(MatchedFilterLagAggregator, + DISABLED_LagEstimateUpdatesRequiredForAggregatedLag) { + constexpr size_t kLag = 5; + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilterLagAggregator aggregator(&data_dumper, /*max_filter_lag=*/kLag, + config.delay); + for (size_t k = 0; k < kNumLagsBeforeDetection * 10; ++k) { + absl::optional aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag, /*pre_echo_lag=*/kLag)); + EXPECT_FALSE(aggregated_lag); + EXPECT_EQ(kLag, aggregated_lag->delay); + } +} + +// Verifies that an aggregated lag is persistent if the lag estimates do not +// change and that an aggregated lag is not produced without gaining lag +// estimate confidence. +TEST(MatchedFilterLagAggregator, DISABLED_PersistentAggregatedLag) { + constexpr size_t kLag1 = 5; + constexpr size_t kLag2 = 10; + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + std::vector lag_estimates(1); + MatchedFilterLagAggregator aggregator(&data_dumper, std::max(kLag1, kLag2), + config.delay); + absl::optional aggregated_lag; + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag1, /*pre_echo_lag=*/kLag1)); + } + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, aggregated_lag->delay); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 40; ++k) { + aggregated_lag = aggregator.Aggregate( + MatchedFilter::LagEstimate(/*lag=*/kLag2, /*pre_echo_lag=*/kLag2)); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, aggregated_lag->delay); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null data dumper. +TEST(MatchedFilterLagAggregatorDeathTest, NullDataDumper) { + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilterLagAggregator(nullptr, 10, config.delay), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc new file mode 100644 index 0000000000..0a04c7809c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/matched_filter_unittest.cc @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter.h" + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/field_trial.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t delay, size_t down_sampling_factor) { + rtc::StringBuilder ss; + ss << "Delay: " << delay; + ss << ", Down sampling factor: " << down_sampling_factor; + return ss.Release(); +} + +constexpr size_t kNumMatchedFilters = 10; +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr size_t kWindowSizeSubBlocks = 32; +constexpr size_t kAlignmentShiftSubBlocks = kWindowSizeSubBlocks * 3 / 4; + +} // namespace + +class MatchedFilterTest : public ::testing::TestWithParam {}; + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for NEON are similar to their reference +// counterparts. +TEST_P(MatchedFilterTest, TestNeonOptimizations) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + const bool kComputeAccumulatederror = GetParam(); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_NEON(512); + std::vector h(512); + std::vector accumulated_error(512); + std::vector accumulated_error_NEON(512); + std::vector scratch_memory(512); + + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_NEON = false; + float error_sum_NEON = 0.f; + + MatchedFilterCore_NEON(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_NEON, &filters_updated_NEON, &error_sum_NEON, + kComputeAccumulatederror, accumulated_error_NEON, + scratch_memory); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, h, + &filters_updated, &error_sum, kComputeAccumulatederror, + accumulated_error); + + EXPECT_EQ(filters_updated, filters_updated_NEON); + EXPECT_NEAR(error_sum, error_sum_NEON, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_NEON[j], 0.00001f); + } + + if (kComputeAccumulatederror) { + for (size_t j = 0; j < accumulated_error.size(); ++j) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_NEON[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.02f); + } + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for SSE2 are bitexact to their reference +// counterparts. +TEST_P(MatchedFilterTest, TestSse2Optimizations) { + const bool kComputeAccumulatederror = GetParam(); + bool use_sse2 = (GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_SSE2(512); + std::vector h(512); + std::vector accumulated_error(512 / 4); + std::vector accumulated_error_SSE2(512 / 4); + std::vector scratch_memory(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_SSE2 = false; + float error_sum_SSE2 = 0.f; + + MatchedFilterCore_SSE2(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_SSE2, &filters_updated_SSE2, + &error_sum_SSE2, kComputeAccumulatederror, + accumulated_error_SSE2, scratch_memory); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, + h, &filters_updated, &error_sum, + kComputeAccumulatederror, accumulated_error); + + EXPECT_EQ(filters_updated, filters_updated_SSE2); + EXPECT_NEAR(error_sum, error_sum_SSE2, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_SSE2[j], 0.00001f); + } + + for (size_t j = 0; j < accumulated_error.size(); ++j) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_SSE2[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.00001f); + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } + } +} + +TEST_P(MatchedFilterTest, TestAvx2Optimizations) { + bool use_avx2 = (GetCPUInfo(kAVX2) != 0); + const bool kComputeAccumulatederror = GetParam(); + if (use_avx2) { + Random random_generator(42U); + constexpr float kSmoothing = 0.7f; + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + std::vector x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector y(sub_block_size); + std::vector h_AVX2(512); + std::vector h(512); + std::vector accumulated_error(512 / 4); + std::vector accumulated_error_AVX2(512 / 4); + std::vector scratch_memory(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_AVX2 = false; + float error_sum_AVX2 = 0.f; + MatchedFilterCore_AVX2(x_index, h.size() * 150.f * 150.f, kSmoothing, x, + y, h_AVX2, &filters_updated_AVX2, + &error_sum_AVX2, kComputeAccumulatederror, + accumulated_error_AVX2, scratch_memory); + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, kSmoothing, x, y, + h, &filters_updated, &error_sum, + kComputeAccumulatederror, accumulated_error); + EXPECT_EQ(filters_updated, filters_updated_AVX2); + EXPECT_NEAR(error_sum, error_sum_AVX2, error_sum / 100000.f); + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_AVX2[j], 0.00001f); + } + for (size_t j = 0; j < accumulated_error.size(); j += 4) { + float difference = + std::abs(accumulated_error[j] - accumulated_error_AVX2[j]); + float relative_difference = accumulated_error[j] > 0 + ? difference / accumulated_error[j] + : difference; + EXPECT_NEAR(relative_difference, 0.0f, 0.00001f); + } + x_index = (x_index + sub_block_size) % x.size(); + } + } + } +} + +#endif + +// Verifies that the (optimized) function MaxSquarePeakIndex() produces output +// equal to the corresponding std-functions. +TEST(MatchedFilter, MaxSquarePeakIndex) { + Random random_generator(42U); + constexpr int kMaxLength = 128; + constexpr int kNumIterationsPerLength = 256; + for (int length = 1; length < kMaxLength; ++length) { + std::vector y(length); + for (int i = 0; i < kNumIterationsPerLength; ++i) { + RandomizeSampleVector(&random_generator, y); + + size_t lag_from_function = MaxSquarePeakIndex(y); + size_t lag_from_std = std::distance( + y.begin(), + std::max_element(y.begin(), y.end(), [](float a, float b) -> bool { + return a * a < b * b; + })); + EXPECT_EQ(lag_from_function, lag_from_std); + } + } +} + +// Verifies that the matched filter produces proper lag estimates for +// artificially delayed signals. +TEST_P(MatchedFilterTest, LagEstimation) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + ApmDataDumper data_dumper(0); + for (size_t delay_samples : {5, 64, 150, 200, 800, 1000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + Decimator capture_decimator(down_sampling_factor); + DelayBuffer signal_delay_buffer(down_sampling_factor * + delay_samples); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < (600 + delay_samples / sub_block_size); ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + RandomizeSampleVector(&random_generator, + render.View(band, channel)); + } + } + signal_delay_buffer.Delay(render.View(/*band=*/0, /*channel=*/0), + capture[0]); + render_delay_buffer->Insert(render); + + if (k == 0) { + render_delay_buffer->Reset(); + } + + render_delay_buffer->PrepareCaptureProcessing(); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture( + downsampled_capture_data.data(), sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, /*use_slow_smoothing=*/false); + } + + // Obtain the lag estimates. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_TRUE(lag_estimate.has_value()); + + // Verify that the expected most accurate lag estimate is correct. + if (lag_estimate.has_value()) { + EXPECT_EQ(delay_samples, lag_estimate->lag); + EXPECT_EQ(delay_samples, lag_estimate->pre_echo_lag); + } + } + } +} + +// Test the pre echo estimation. +TEST_P(MatchedFilterTest, PreEchoEstimation) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + std::vector capture_with_pre_echo(kBlockSize, 0.f); + ApmDataDumper data_dumper(0); + // data_dumper.SetActivated(true); + size_t pre_echo_delay_samples = 20e-3 * 16000 / down_sampling_factor; + size_t echo_delay_samples = 50e-3 * 16000 / down_sampling_factor; + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + Decimator capture_decimator(down_sampling_factor); + DelayBuffer signal_echo_delay_buffer(down_sampling_factor * + echo_delay_samples); + DelayBuffer signal_pre_echo_delay_buffer(down_sampling_factor * + pre_echo_delay_samples); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + // Analyze the correlation between render and capture. + for (size_t k = 0; k < (600 + echo_delay_samples / sub_block_size); ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + RandomizeSampleVector(&random_generator, render.View(band, channel)); + } + } + signal_echo_delay_buffer.Delay(render.View(0, 0), capture[0]); + signal_pre_echo_delay_buffer.Delay(render.View(0, 0), + capture_with_pre_echo); + for (size_t k = 0; k < capture[0].size(); ++k) { + constexpr float gain_pre_echo = 0.8f; + capture[0][k] += gain_pre_echo * capture_with_pre_echo[k]; + } + render_delay_buffer->Insert(render); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, /*use_slow_smoothing=*/false); + } + // Obtain the lag estimates. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_TRUE(lag_estimate.has_value()); + // Verify that the expected most accurate lag estimate is correct. + if (lag_estimate.has_value()) { + EXPECT_EQ(echo_delay_samples, lag_estimate->lag); + if (kDetectPreEcho) { + // The pre echo delay is estimated in a subsampled domain and a larger + // error is allowed. + EXPECT_NEAR(pre_echo_delay_samples, lag_estimate->pre_echo_lag, 4); + } else { + // The pre echo delay fallback to the highest mached filter peak when + // its detection is disabled. + EXPECT_EQ(echo_delay_samples, lag_estimate->pre_echo_lag); + } + } + } +} + +// Verifies that the matched filter does not produce reliable and accurate +// estimates for uncorrelated render and capture signals. +TEST_P(MatchedFilterTest, LagNotReliableForUncorrelatedRenderAndCapture) { + const bool kDetectPreEcho = GetParam(); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = kNumMatchedFilters; + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::array capture_data; + rtc::ArrayView capture(capture_data.data(), sub_block_size); + std::fill(capture.begin(), capture.end(), 0.f); + ApmDataDumper data_dumper(0); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, + render.View(/*band=*/0, /*channel=*/0)); + RandomizeSampleVector(&random_generator, capture); + render_delay_buffer->Insert(render); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture, + false); + } + + // Obtain the best lag estimate and Verify that no lag estimates are + // reliable. + auto best_lag_estimates = filter.GetBestLagEstimate(); + EXPECT_FALSE(best_lag_estimates.has_value()); + } +} + +// Verifies that the matched filter does not produce updated lag estimates for +// render signals of low level. +TEST_P(MatchedFilterTest, LagNotUpdatedForLowLevelRender) { + const bool kDetectPreEcho = GetParam(); + Random random_generator(42U); + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + Block render(kNumBands, kNumChannels); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter filter( + &data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, kAlignmentShiftSubBlocks, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, kDetectPreEcho); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), kSampleRateHz, + kNumChannels)); + Decimator capture_decimator(down_sampling_factor); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render.View(0, 0)); + for (auto& render_k : render.View(0, 0)) { + render_k *= 149.f / 32767.f; + } + std::copy(render.begin(0, 0), render.end(0, 0), capture[0].begin()); + std::array downsampled_capture_data; + rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), + sub_block_size); + capture_decimator.Decimate(capture[0], downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture, false); + } + + // Verify that no lag estimate has been produced. + auto lag_estimate = filter.GetBestLagEstimate(); + EXPECT_FALSE(lag_estimate.has_value()); + } +} + +INSTANTIATE_TEST_SUITE_P(_, MatchedFilterTest, testing::Values(true, false)); + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +class MatchedFilterDeathTest : public ::testing::TestWithParam {}; + +// Verifies the check for non-zero windows size. +TEST_P(MatchedFilterDeathTest, ZeroWindowSize) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 16, 0, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for non-null data dumper. +TEST_P(MatchedFilterDeathTest, NullDataDumper) { + const bool kDetectPreEcho = GetParam(); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(nullptr, DetectOptimization(), 16, 1, 1, 1, 150, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for that the sub block size is a multiple of 4. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST_P(MatchedFilterDeathTest, DISABLED_BlockSizeMultipleOf4) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 15, 1, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +// Verifies the check for that there is an integer number of sub blocks that add +// up to a block size. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST_P(MatchedFilterDeathTest, DISABLED_SubBlockSizeAddsUpToBlockSize) { + const bool kDetectPreEcho = GetParam(); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EXPECT_DEATH(MatchedFilter(&data_dumper, DetectOptimization(), 12, 1, 1, 1, + 150, config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + kDetectPreEcho), + ""); +} + +INSTANTIATE_TEST_SUITE_P(_, + MatchedFilterDeathTest, + testing::Values(true, false)); + +#endif + +} // namespace aec3 + +TEST(MatchedFilterFieldTrialTest, PreEchoConfigurationTest) { + float threshold_in = 0.1f; + int mode_in = 2; + rtc::StringBuilder field_trial_name; + field_trial_name << "WebRTC-Aec3PreEchoConfiguration/threshold:" + << threshold_in << ",mode:" << mode_in << "/"; + webrtc::test::ScopedFieldTrials field_trials(field_trial_name.str()); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter matched_filter( + &data_dumper, DetectOptimization(), + kBlockSize / config.delay.down_sampling_factor, + aec3::kWindowSizeSubBlocks, aec3::kNumMatchedFilters, + aec3::kAlignmentShiftSubBlocks, + config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo); + + auto& pre_echo_config = matched_filter.GetPreEchoConfiguration(); + EXPECT_EQ(pre_echo_config.threshold, threshold_in); + EXPECT_EQ(pre_echo_config.mode, mode_in); +} + +TEST(MatchedFilterFieldTrialTest, WrongPreEchoConfigurationTest) { + constexpr float kDefaultThreshold = 0.5f; + constexpr int kDefaultMode = 0; + float threshold_in = -0.1f; + int mode_in = 5; + rtc::StringBuilder field_trial_name; + field_trial_name << "WebRTC-Aec3PreEchoConfiguration/threshold:" + << threshold_in << ",mode:" << mode_in << "/"; + webrtc::test::ScopedFieldTrials field_trials(field_trial_name.str()); + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + MatchedFilter matched_filter( + &data_dumper, DetectOptimization(), + kBlockSize / config.delay.down_sampling_factor, + aec3::kWindowSizeSubBlocks, aec3::kNumMatchedFilters, + aec3::kAlignmentShiftSubBlocks, + config.render_levels.poor_excitation_render_limit, + config.delay.delay_estimate_smoothing, + config.delay.delay_estimate_smoothing_delay_found, + config.delay.delay_candidate_detection_threshold, + config.delay.detect_pre_echo); + + auto& pre_echo_config = matched_filter.GetPreEchoConfiguration(); + EXPECT_EQ(pre_echo_config.threshold, kDefaultThreshold); + EXPECT_EQ(pre_echo_config.mode, kDefaultMode); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc new file mode 100644 index 0000000000..c5c33dbd68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_block_processor.h" + +namespace webrtc { +namespace test { + +MockBlockProcessor::MockBlockProcessor() = default; +MockBlockProcessor::~MockBlockProcessor() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h new file mode 100644 index 0000000000..c9ae38c4aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_block_processor.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ + +#include + +#include "modules/audio_processing/aec3/block_processor.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockBlockProcessor : public BlockProcessor { + public: + MockBlockProcessor(); + virtual ~MockBlockProcessor(); + + MOCK_METHOD(void, + ProcessCapture, + (bool level_change, + bool saturated_microphone_signal, + Block* linear_output, + Block* capture_block), + (override)); + MOCK_METHOD(void, BufferRender, (const Block& block), (override)); + MOCK_METHOD(void, + UpdateEchoLeakageStatus, + (bool leakage_detected), + (override)); + MOCK_METHOD(void, + GetMetrics, + (EchoControl::Metrics * metrics), + (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc new file mode 100644 index 0000000000..b903bf0785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_echo_remover.h" + +namespace webrtc { +namespace test { + +MockEchoRemover::MockEchoRemover() = default; +MockEchoRemover::~MockEchoRemover() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h new file mode 100644 index 0000000000..31f075ef0a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockEchoRemover : public EchoRemover { + public: + MockEchoRemover(); + virtual ~MockEchoRemover(); + + MOCK_METHOD(void, + ProcessCapture, + (EchoPathVariability echo_path_variability, + bool capture_signal_saturation, + const absl::optional& delay_estimate, + RenderBuffer* render_buffer, + Block* linear_output, + Block* capture), + (override)); + MOCK_METHOD(void, + UpdateEchoLeakageStatus, + (bool leakage_detected), + (override)); + MOCK_METHOD(void, + GetMetrics, + (EchoControl::Metrics * metrics), + (const, override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc new file mode 100644 index 0000000000..d4ad09b4bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_render_delay_buffer.h" + +namespace webrtc { +namespace test { + +MockRenderDelayBuffer::MockRenderDelayBuffer(int sample_rate_hz, + size_t num_channels) + : block_buffer_(GetRenderDelayBufferSize(4, 4, 12), + NumBandsForRate(sample_rate_hz), + num_channels), + spectrum_buffer_(block_buffer_.buffer.size(), num_channels), + fft_buffer_(block_buffer_.buffer.size(), num_channels), + render_buffer_(&block_buffer_, &spectrum_buffer_, &fft_buffer_), + downsampled_render_buffer_(GetDownSampledBufferSize(4, 4)) { + ON_CALL(*this, GetRenderBuffer()) + .WillByDefault( + ::testing::Invoke(this, &MockRenderDelayBuffer::FakeGetRenderBuffer)); + ON_CALL(*this, GetDownsampledRenderBuffer()) + .WillByDefault(::testing::Invoke( + this, &MockRenderDelayBuffer::FakeGetDownsampledRenderBuffer)); +} + +MockRenderDelayBuffer::~MockRenderDelayBuffer() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h new file mode 100644 index 0000000000..c17fd62caa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayBuffer : public RenderDelayBuffer { + public: + MockRenderDelayBuffer(int sample_rate_hz, size_t num_channels); + virtual ~MockRenderDelayBuffer(); + + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(RenderDelayBuffer::BufferingEvent, + Insert, + (const Block& block), + (override)); + MOCK_METHOD(void, HandleSkippedCaptureProcessing, (), (override)); + MOCK_METHOD(RenderDelayBuffer::BufferingEvent, + PrepareCaptureProcessing, + (), + (override)); + MOCK_METHOD(bool, AlignFromDelay, (size_t delay), (override)); + MOCK_METHOD(void, AlignFromExternalDelay, (), (override)); + MOCK_METHOD(size_t, Delay, (), (const, override)); + MOCK_METHOD(size_t, MaxDelay, (), (const, override)); + MOCK_METHOD(RenderBuffer*, GetRenderBuffer, (), (override)); + MOCK_METHOD(const DownsampledRenderBuffer&, + GetDownsampledRenderBuffer, + (), + (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, HasReceivedBufferDelay, (), (override)); + + private: + RenderBuffer* FakeGetRenderBuffer() { return &render_buffer_; } + const DownsampledRenderBuffer& FakeGetDownsampledRenderBuffer() const { + return downsampled_render_buffer_; + } + BlockBuffer block_buffer_; + SpectrumBuffer spectrum_buffer_; + FftBuffer fft_buffer_; + RenderBuffer render_buffer_; + DownsampledRenderBuffer downsampled_render_buffer_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc new file mode 100644 index 0000000000..4ae2af96bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/mock/mock_render_delay_controller.h" + +namespace webrtc { +namespace test { + +MockRenderDelayController::MockRenderDelayController() = default; +MockRenderDelayController::~MockRenderDelayController() = default; + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h new file mode 100644 index 0000000000..14d499dd28 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayController : public RenderDelayController { + public: + MockRenderDelayController(); + virtual ~MockRenderDelayController(); + + MOCK_METHOD(void, Reset, (bool reset_delay_statistics), (override)); + MOCK_METHOD(void, LogRenderCall, (), (override)); + MOCK_METHOD(absl::optional, + GetDelay, + (const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture), + (override)); + MOCK_METHOD(bool, HasClockdrift, (), (const, override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc new file mode 100644 index 0000000000..7a81ee89ea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.cc @@ -0,0 +1,60 @@ + +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/moving_average.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +MovingAverage::MovingAverage(size_t num_elem, size_t mem_len) + : num_elem_(num_elem), + mem_len_(mem_len - 1), + scaling_(1.0f / static_cast(mem_len)), + memory_(num_elem * mem_len_, 0.f), + mem_index_(0) { + RTC_DCHECK(num_elem_ > 0); + RTC_DCHECK(mem_len > 0); +} + +MovingAverage::~MovingAverage() = default; + +void MovingAverage::Average(rtc::ArrayView input, + rtc::ArrayView output) { + RTC_DCHECK(input.size() == num_elem_); + RTC_DCHECK(output.size() == num_elem_); + + // Sum all contributions. + std::copy(input.begin(), input.end(), output.begin()); + for (auto i = memory_.begin(); i < memory_.end(); i += num_elem_) { + std::transform(i, i + num_elem_, output.begin(), output.begin(), + std::plus()); + } + + // Divide by mem_len_. + for (float& o : output) { + o *= scaling_; + } + + // Update memory. + if (mem_len_ > 0) { + std::copy(input.begin(), input.end(), + memory_.begin() + mem_index_ * num_elem_); + mem_index_ = (mem_index_ + 1) % mem_len_; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h new file mode 100644 index 0000000000..913d78519c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ + +#include + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace aec3 { + +class MovingAverage { + public: + // Creates an instance of MovingAverage that accepts inputs of length num_elem + // and averages over mem_len inputs. + MovingAverage(size_t num_elem, size_t mem_len); + ~MovingAverage(); + + // Computes the average of input and mem_len-1 previous inputs and stores the + // result in output. + void Average(rtc::ArrayView input, rtc::ArrayView output); + + private: + const size_t num_elem_; + const size_t mem_len_; + const float scaling_; + std::vector memory_; + size_t mem_index_; +}; + +} // namespace aec3 +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc new file mode 100644 index 0000000000..84ba9cbc5b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/moving_average_unittest.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/moving_average.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(MovingAverage, Average) { + constexpr size_t num_elem = 4; + constexpr size_t mem_len = 3; + constexpr float e = 1e-6f; + aec3::MovingAverage ma(num_elem, mem_len); + std::array data1 = {1, 2, 3, 4}; + std::array data2 = {5, 1, 9, 7}; + std::array data3 = {3, 3, 5, 6}; + std::array data4 = {8, 4, 2, 1}; + std::array output; + + ma.Average(data1, output); + EXPECT_NEAR(output[0], data1[0] / 3.0f, e); + EXPECT_NEAR(output[1], data1[1] / 3.0f, e); + EXPECT_NEAR(output[2], data1[2] / 3.0f, e); + EXPECT_NEAR(output[3], data1[3] / 3.0f, e); + + ma.Average(data2, output); + EXPECT_NEAR(output[0], (data1[0] + data2[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data1[1] + data2[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data1[2] + data2[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data1[3] + data2[3]) / 3.0f, e); + + ma.Average(data3, output); + EXPECT_NEAR(output[0], (data1[0] + data2[0] + data3[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data1[1] + data2[1] + data3[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data1[2] + data2[2] + data3[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data1[3] + data2[3] + data3[3]) / 3.0f, e); + + ma.Average(data4, output); + EXPECT_NEAR(output[0], (data2[0] + data3[0] + data4[0]) / 3.0f, e); + EXPECT_NEAR(output[1], (data2[1] + data3[1] + data4[1]) / 3.0f, e); + EXPECT_NEAR(output[2], (data2[2] + data3[2] + data4[2]) / 3.0f, e); + EXPECT_NEAR(output[3], (data2[3] + data3[3] + data4[3]) / 3.0f, e); +} + +TEST(MovingAverage, PassThrough) { + constexpr size_t num_elem = 4; + constexpr size_t mem_len = 1; + constexpr float e = 1e-6f; + aec3::MovingAverage ma(num_elem, mem_len); + std::array data1 = {1, 2, 3, 4}; + std::array data2 = {5, 1, 9, 7}; + std::array data3 = {3, 3, 5, 6}; + std::array data4 = {8, 4, 2, 1}; + std::array output; + + ma.Average(data1, output); + EXPECT_NEAR(output[0], data1[0], e); + EXPECT_NEAR(output[1], data1[1], e); + EXPECT_NEAR(output[2], data1[2], e); + EXPECT_NEAR(output[3], data1[3], e); + + ma.Average(data2, output); + EXPECT_NEAR(output[0], data2[0], e); + EXPECT_NEAR(output[1], data2[1], e); + EXPECT_NEAR(output[2], data2[2], e); + EXPECT_NEAR(output[3], data2[3], e); + + ma.Average(data3, output); + EXPECT_NEAR(output[0], data3[0], e); + EXPECT_NEAR(output[1], data3[1], e); + EXPECT_NEAR(output[2], data3[2], e); + EXPECT_NEAR(output[3], data3[3], e); + + ma.Average(data4, output); + EXPECT_NEAR(output[0], data4[0], e); + EXPECT_NEAR(output[1], data4[1], e); + EXPECT_NEAR(output[2], data4[2], e); + EXPECT_NEAR(output[3], data4[3], e); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc new file mode 100644 index 0000000000..98068964d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include + +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +constexpr int kNumFramesPerSecond = 100; + +// Compares the left and right channels in the render `frame` to determine +// whether the signal is a proper stereo signal. To allow for differences +// introduced by hardware drivers, a threshold `detection_threshold` is used for +// the detection. +bool HasStereoContent(const std::vector>>& frame, + float detection_threshold) { + if (frame[0].size() < 2) { + return false; + } + + for (size_t band = 0; band < frame.size(); ++band) { + for (size_t k = 0; k < frame[band][0].size(); ++k) { + if (std::fabs(frame[band][0][k] - frame[band][1][k]) > + detection_threshold) { + return true; + } + } + } + return false; +} + +// In order to avoid logging metrics for very short lifetimes that are unlikely +// to reflect real calls and that may dilute the "real" data, logging is limited +// to lifetimes of at leats 5 seconds. +constexpr int kMinNumberOfFramesRequiredToLogMetrics = 500; + +// Continuous metrics are logged every 10 seconds. +constexpr int kFramesPer10Seconds = 1000; + +} // namespace + +MultiChannelContentDetector::MetricsLogger::MetricsLogger() {} + +MultiChannelContentDetector::MetricsLogger::~MetricsLogger() { + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.PersistentMultichannelContentEverDetected", + any_multichannel_content_detected_ ? 1 : 0); +} + +void MultiChannelContentDetector::MetricsLogger::Update( + bool persistent_multichannel_content_detected) { + ++frame_counter_; + if (persistent_multichannel_content_detected) { + any_multichannel_content_detected_ = true; + ++persistent_multichannel_frame_counter_; + } + + if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics) + return; + if (frame_counter_ % kFramesPer10Seconds != 0) + return; + const bool mostly_multichannel_last_10_seconds = + (persistent_multichannel_frame_counter_ >= kFramesPer10Seconds / 2); + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.ProcessingPersistentMultichannelContent", + mostly_multichannel_last_10_seconds ? 1 : 0); + + persistent_multichannel_frame_counter_ = 0; +} + +MultiChannelContentDetector::MultiChannelContentDetector( + bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold, + int stereo_detection_timeout_threshold_seconds, + float stereo_detection_hysteresis_seconds) + : detect_stereo_content_(detect_stereo_content), + detection_threshold_(detection_threshold), + detection_timeout_threshold_frames_( + stereo_detection_timeout_threshold_seconds > 0 + ? absl::make_optional(stereo_detection_timeout_threshold_seconds * + kNumFramesPerSecond) + : absl::nullopt), + stereo_detection_hysteresis_frames_(static_cast( + stereo_detection_hysteresis_seconds * kNumFramesPerSecond)), + metrics_logger_((detect_stereo_content && num_render_input_channels > 1) + ? std::make_unique() + : nullptr), + persistent_multichannel_content_detected_( + !detect_stereo_content && num_render_input_channels > 1) {} + +bool MultiChannelContentDetector::UpdateDetection( + const std::vector>>& frame) { + if (!detect_stereo_content_) { + RTC_DCHECK_EQ(frame[0].size() > 1, + persistent_multichannel_content_detected_); + return false; + } + + const bool previous_persistent_multichannel_content_detected = + persistent_multichannel_content_detected_; + const bool stereo_detected_in_frame = + HasStereoContent(frame, detection_threshold_); + + consecutive_frames_with_stereo_ = + stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0; + frames_since_stereo_detected_last_ = + stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1; + + // Detect persistent multichannel content. + if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) { + persistent_multichannel_content_detected_ = true; + } + if (detection_timeout_threshold_frames_.has_value() && + frames_since_stereo_detected_last_ >= + *detection_timeout_threshold_frames_) { + persistent_multichannel_content_detected_ = false; + } + + // Detect temporary multichannel content. + temporary_multichannel_content_detected_ = + persistent_multichannel_content_detected_ ? false + : stereo_detected_in_frame; + + if (metrics_logger_) + metrics_logger_->Update(persistent_multichannel_content_detected_); + + return previous_persistent_multichannel_content_detected != + persistent_multichannel_content_detected_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h new file mode 100644 index 0000000000..1742c5fc17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// Analyzes audio content to determine whether the contained audio is proper +// multichannel, or only upmixed mono. To allow for differences introduced by +// hardware drivers, a threshold `detection_threshold` is used for the +// detection. +// Logs metrics continously and upon destruction. +class MultiChannelContentDetector { + public: + // If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is + // applied: Once multichannel is detected, the detector remains in that state + // for its lifetime. + MultiChannelContentDetector(bool detect_stereo_content, + int num_render_input_channels, + float detection_threshold, + int stereo_detection_timeout_threshold_seconds, + float stereo_detection_hysteresis_seconds); + + // Compares the left and right channels in the render `frame` to determine + // whether the signal is a proper multichannel signal. Returns a bool + // indicating whether a change in the proper multichannel content was + // detected. + bool UpdateDetection( + const std::vector>>& frame); + + bool IsProperMultiChannelContentDetected() const { + return persistent_multichannel_content_detected_; + } + + bool IsTemporaryMultiChannelContentDetected() const { + return temporary_multichannel_content_detected_; + } + + private: + // Tracks and logs metrics for the amount of multichannel content detected. + class MetricsLogger { + public: + MetricsLogger(); + + // The destructor logs call summary statistics. + ~MetricsLogger(); + + // Updates and logs metrics. + void Update(bool persistent_multichannel_content_detected); + + private: + int frame_counter_ = 0; + + // Counts the number of frames of persistent multichannel audio observed + // during the current metrics collection interval. + int persistent_multichannel_frame_counter_ = 0; + + // Indicates whether persistent multichannel content has ever been detected. + bool any_multichannel_content_detected_ = false; + }; + + const bool detect_stereo_content_; + const float detection_threshold_; + const absl::optional detection_timeout_threshold_frames_; + const int stereo_detection_hysteresis_frames_; + + // Collects and reports metrics on the amount of multichannel content + // detected. Only created if |num_render_input_channels| > 1 and + // |detect_stereo_content_| is true. + const std::unique_ptr metrics_logger_; + + bool persistent_multichannel_content_detected_; + bool temporary_multichannel_content_detected_ = false; + int64_t frames_since_stereo_detected_last_ = 0; + int64_t consecutive_frames_with_stereo_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc new file mode 100644 index 0000000000..8d38dd0991 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/multi_channel_content_detector_unittest.cc @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/multi_channel_content_detector.h" + +#include "system_wrappers/include/metrics.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(MultiChannelContentDetector, HandlingOfMono) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/1, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, HandlingOfDetectionOff) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/false, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, InitialDetectionOfStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); +} + +TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereo) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f); + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold); + + EXPECT_FALSE(mc.UpdateDetection(frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) { + constexpr float kThreshold = 1.0f; + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/kThreshold, + /*stereo_detection_timeout_threshold_seconds=*/0, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> frame( + 1, std::vector>(2, std::vector(160, 0.0f))); + std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f); + std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f); + + EXPECT_TRUE(mc.UpdateDetection(frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + EXPECT_FALSE(mc.UpdateDetection(frame)); +} + +class MultiChannelContentDetectorTimeoutBehavior + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector, + MultiChannelContentDetectorTimeoutBehavior, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(0, 1, 10))); + +TEST_P(MultiChannelContentDetectorTimeoutBehavior, + TimeOutBehaviorForNonTrueStereo) { + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int stereo_detection_timeout_threshold_seconds = + std::get<1>(GetParam()); + const int stereo_detection_timeout_threshold_frames = + stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond; + + MultiChannelContentDetector mc(detect_stereo_content, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + stereo_detection_timeout_threshold_seconds, + /*stereo_detection_hysteresis_seconds=*/0.0f); + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + + // Pass fake stereo frames and verify the content detection. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + } + + // Pass a true stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + } + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + + // Pass fake stereo frames until any timeouts are about to occur. + for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + + // Pass a fake stereo frame and verify that any timeouts properly occur. + if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) { + EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + + // Pass fake stereo frames and verify the behavior after any timeout. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content && + stereo_detection_timeout_threshold_frames > 0) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + } +} + +class MultiChannelContentDetectorHysteresisBehavior + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + MultiChannelContentDetector, + MultiChannelContentDetectorHysteresisBehavior, + ::testing::Combine(::testing::Values(false, true), + ::testing::Values(0.0f, 0.1f, 0.2f))); + +TEST_P(MultiChannelContentDetectorHysteresisBehavior, + PeriodBeforeStereoDetectionIsTriggered) { + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam()); + const int stereo_detection_hysteresis_frames = + stereo_detection_hysteresis_seconds * kNumFramesPerSecond; + + MultiChannelContentDetector mc( + detect_stereo_content, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/0, + stereo_detection_hysteresis_seconds); + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + + // Pass fake stereo frames and verify the content detection. + for (int k = 0; k < 10; ++k) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + if (detect_stereo_content) { + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + } else { + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + } + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass a two true stereo frames and verify that they are properly detected. + ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 || + stereo_detection_hysteresis_frames == 0); + for (int k = 0; k < 2; ++k) { + if (detect_stereo_content) { + if (stereo_detection_hysteresis_seconds == 0.0f) { + if (k == 0) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + } + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected()); + } + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + } + + if (stereo_detection_hysteresis_seconds == 0.0f) { + return; + } + + // Pass true stereo frames until any timeouts are about to occur. + for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) { + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_FALSE(mc.IsProperMultiChannelContentDetected()); + EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + } + + // Pass a true stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass an additional true stereo frame and verify that it is properly + // detected. + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } + + // Pass a fake stereo frame and verify that it is properly detected. + if (detect_stereo_content) { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } else { + EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame)); + EXPECT_TRUE(mc.IsProperMultiChannelContentDetected()); + EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected()); + } +} + +class MultiChannelContentDetectorMetricsDisabled + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + /*no prefix*/, + MultiChannelContentDetectorMetricsDisabled, + ::testing::Values(std::tuple(false, 2), + std::tuple(true, 1))); + +// Test that no metrics are logged when they are clearly uninteresting and would +// dilute relevant data: when the reference audio is single channel, or when +// dynamic detection is disabled. +TEST_P(MultiChannelContentDetectorMetricsDisabled, ReportsNoMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + const bool detect_stereo_content = std::get<0>(GetParam()); + const int channel_count = std::get<1>(GetParam()); + std::vector>> audio_frame = { + std::vector>(channel_count, + std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/detect_stereo_content, + /*num_render_input_channels=*/channel_count, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 20 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 3 seconds, no metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsNoMetricsForShortLifetime) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + constexpr int kTooFewFramesToLogMetrics = 3 * kNumFramesPerSecond; + std::vector>> audio_frame = { + std::vector>(2, std::vector(160, 100.0f))}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < kTooFewFramesToLogMetrics; ++k) { + mc.UpdateDetection(audio_frame); + } + } + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 0, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); +} + +// Tests that after 25 seconds, metrics are reported. +TEST(MultiChannelContentDetectorMetrics, ReportsMetrics) { + metrics::Reset(); + constexpr int kNumFramesPerSecond = 100; + std::vector>> true_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 101.0f)}}; + std::vector>> fake_stereo_frame = { + {std::vector(160, 100.0f), std::vector(160, 100.0f)}}; + { + MultiChannelContentDetector mc( + /*detect_stereo_content=*/true, + /*num_render_input_channels=*/2, + /*detection_threshold=*/0.0f, + /*stereo_detection_timeout_threshold_seconds=*/1, + /*stereo_detection_hysteresis_seconds=*/0.0f); + for (int k = 0; k < 10 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(true_stereo_frame); + } + for (int k = 0; k < 15 * kNumFramesPerSecond; ++k) { + mc.UpdateDetection(fake_stereo_frame); + } + } + // After 10 seconds of true stereo and the remainder fake stereo, we expect + // one lifetime metric sample (multichannel detected) and two periodic samples + // (one multichannel, one mono). + + // Check lifetime metric. + EXPECT_METRIC_EQ( + 1, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "PersistentMultichannelContentEverDetected", 1)); + + // Check periodic metric. + EXPECT_METRIC_EQ( + 2, metrics::NumSamples("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent")); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 0)); + EXPECT_METRIC_EQ( + 1, metrics::NumEvents("WebRTC.Audio.EchoCanceller." + "ProcessingPersistentMultichannelContent", 1)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h new file mode 100644 index 0000000000..0d8a06b2cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/nearend_detector.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class NearendDetector { + public: + virtual ~NearendDetector() {} + + // Returns whether the current state is the nearend state. + virtual bool IsNearendState() const = 0; + + // Updates the state selection based on latest spectral estimates. + virtual void Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc new file mode 100644 index 0000000000..8e391d6fa6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" + +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kHErrorInitial = 10000.f; +constexpr int kPoorExcitationCounterInitial = 1000; + +} // namespace + +std::atomic RefinedFilterUpdateGain::instance_count_(0); + +RefinedFilterUpdateGain::RefinedFilterUpdateGain( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + size_t config_change_duration_blocks) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_change_duration_blocks_( + static_cast(config_change_duration_blocks)), + poor_excitation_counter_(kPoorExcitationCounterInitial) { + SetConfig(config, true); + H_error_.fill(kHErrorInitial); + RTC_DCHECK_LT(0, config_change_duration_blocks_); + one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_; +} + +RefinedFilterUpdateGain::~RefinedFilterUpdateGain() {} + +void RefinedFilterUpdateGain::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + if (echo_path_variability.gain_change) { + // TODO(bugs.webrtc.org/9526) Handle gain changes. + } + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + H_error_.fill(kHErrorInitial); + } + + if (!echo_path_variability.gain_change) { + poor_excitation_counter_ = kPoorExcitationCounterInitial; + call_counter_ = 0; + } +} + +void RefinedFilterUpdateGain::Compute( + const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + rtc::ArrayView erl, + size_t size_partitions, + bool saturated_capture_signal, + bool disallow_leakage_diverged, + FftData* gain_fft) { + RTC_DCHECK(gain_fft); + // Introducing shorter notation to improve readability. + const FftData& E_refined = subtractor_output.E_refined; + const auto& E2_refined = subtractor_output.E2_refined; + const auto& E2_coarse = subtractor_output.E2_coarse; + FftData* G = gain_fft; + const auto& X2 = render_power; + + ++call_counter_; + + UpdateCurrentConfig(); + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + } else { + // Corresponds to WGN of power -39 dBFS. + std::array mu; + // mu = H_error / (0.5* H_error* X2 + n * E2). + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (X2[k] >= current_config_.noise_gate) { + mu[k] = H_error_[k] / + (0.5f * H_error_[k] * X2[k] + size_partitions * E2_refined[k]); + } else { + mu[k] = 0.f; + } + } + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // H_error = H_error - 0.5 * mu * X2 * H_error. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + H_error_[k] -= 0.5f * mu[k] * X2[k] * H_error_[k]; + } + + // G = mu * E. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + G->re[k] = mu[k] * E_refined.re[k]; + G->im[k] = mu[k] * E_refined.im[k]; + } + } + + // H_error = H_error + factor * erl. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (E2_refined[k] <= E2_coarse[k] || disallow_leakage_diverged) { + H_error_[k] += current_config_.leakage_converged * erl[k]; + } else { + H_error_[k] += current_config_.leakage_diverged * erl[k]; + } + + H_error_[k] = std::max(H_error_[k], current_config_.error_floor); + H_error_[k] = std::min(H_error_[k], current_config_.error_ceil); + } + + data_dumper_->DumpRaw("aec3_refined_gain_H_error", H_error_); +} + +void RefinedFilterUpdateGain::UpdateCurrentConfig() { + RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_); + if (config_change_counter_ > 0) { + if (--config_change_counter_ > 0) { + auto average = [](float from, float to, float from_weight) { + return from * from_weight + to * (1.f - from_weight); + }; + + float change_factor = + config_change_counter_ * one_by_config_change_duration_blocks_; + + current_config_.leakage_converged = + average(old_target_config_.leakage_converged, + target_config_.leakage_converged, change_factor); + current_config_.leakage_diverged = + average(old_target_config_.leakage_diverged, + target_config_.leakage_diverged, change_factor); + current_config_.error_floor = + average(old_target_config_.error_floor, target_config_.error_floor, + change_factor); + current_config_.error_ceil = + average(old_target_config_.error_ceil, target_config_.error_ceil, + change_factor); + current_config_.noise_gate = + average(old_target_config_.noise_gate, target_config_.noise_gate, + change_factor); + } else { + current_config_ = old_target_config_ = target_config_; + } + } + RTC_DCHECK_LE(0, config_change_counter_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h new file mode 100644 index 0000000000..1a68ebc296 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +class AdaptiveFirFilter; +class ApmDataDumper; +struct EchoPathVariability; +struct FftData; +class RenderSignalAnalyzer; +struct SubtractorOutput; + +// Provides functionality for computing the adaptive gain for the refined +// filter. +class RefinedFilterUpdateGain { + public: + RefinedFilterUpdateGain( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + size_t config_change_duration_blocks); + ~RefinedFilterUpdateGain(); + + RefinedFilterUpdateGain(const RefinedFilterUpdateGain&) = delete; + RefinedFilterUpdateGain& operator=(const RefinedFilterUpdateGain&) = delete; + + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Computes the gain. + void Compute(const std::array& render_power, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + rtc::ArrayView erl, + size_t size_partitions, + bool saturated_capture_signal, + bool disallow_leakage_diverged, + FftData* gain_fft); + + // Sets a new config. + void SetConfig( + const EchoCanceller3Config::Filter::RefinedConfiguration& config, + bool immediate_effect) { + if (immediate_effect) { + old_target_config_ = current_config_ = target_config_ = config; + config_change_counter_ = 0; + } else { + old_target_config_ = current_config_; + target_config_ = config; + config_change_counter_ = config_change_duration_blocks_; + } + } + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const int config_change_duration_blocks_; + float one_by_config_change_duration_blocks_; + EchoCanceller3Config::Filter::RefinedConfiguration current_config_; + EchoCanceller3Config::Filter::RefinedConfiguration target_config_; + EchoCanceller3Config::Filter::RefinedConfiguration old_target_config_; + std::array H_error_; + size_t poor_excitation_counter_; + size_t call_counter_ = 0; + int config_change_counter_ = 0; + + // Updates the current config towards the target config. + void UpdateCurrentConfig(); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..c77c5b53d5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Method for performing the simulations needed to test the refined filter +// update gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + int filter_length_blocks, + const std::vector& blocks_with_echo_path_changes, + const std::vector& blocks_with_saturation, + bool use_silent_render_in_second_half, + std::array* e_last_block, + std::array* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + Aec3Optimization optimization = DetectOptimization(); + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + EchoCanceller3Config config; + config.filter.refined.length_blocks = filter_length_blocks; + config.filter.coarse.length_blocks = filter_length_blocks; + AdaptiveFirFilter refined_filter( + config.filter.refined.length_blocks, config.filter.refined.length_blocks, + config.filter.config_change_duration_blocks, kNumRenderChannels, + optimization, &data_dumper); + AdaptiveFirFilter coarse_filter( + config.filter.coarse.length_blocks, config.filter.coarse.length_blocks, + config.filter.config_change_duration_blocks, kNumRenderChannels, + optimization, &data_dumper); + std::vector>> H2( + kNumCaptureChannels, std::vector>( + refined_filter.max_filter_size_partitions(), + std::array())); + for (auto& H2_ch : H2) { + for (auto& H2_k : H2_ch) { + H2_k.fill(0.f); + } + } + std::vector> h( + kNumCaptureChannels, + std::vector( + GetTimeDomainLength(refined_filter.max_filter_size_partitions()), + 0.f)); + + Aec3Fft fft; + std::array x_old; + x_old.fill(0.f); + CoarseFilterUpdateGain coarse_gain( + config.filter.coarse, config.filter.config_change_duration_blocks); + RefinedFilterUpdateGain refined_gain( + config.filter.refined, config.filter.config_change_duration_blocks); + Random random_generator(42U); + Block x(kNumBands, kNumRenderChannels); + std::vector y(kBlockSize, 0.f); + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + AecState aec_state(config, kNumCaptureChannels); + RenderSignalAnalyzer render_signal_analyzer(config); + absl::optional delay_estimate; + std::array s_scratch; + std::array s; + FftData S; + FftData G; + std::vector output(kNumCaptureChannels); + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + FftData& E_refined = output[0].E_refined; + FftData E_coarse; + std::vector> Y2(kNumCaptureChannels); + std::vector> E2_refined( + kNumCaptureChannels); + std::array& e_refined = output[0].e_refined; + std::array& e_coarse = output[0].e_coarse; + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + refined_filter.HandleEchoPathChange(); + } + + // Handle saturation. + const bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + std::fill(x.begin(band, channel), x.end(band, channel), 0.f); + } + } + } else { + for (int band = 0; band < x.NumChannels(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + } + delay_buffer.Delay(x.View(/*band=*/0, /*channel=*/0), y); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + aec_state.MinDirectPathFilterDelay()); + + // Apply the refined filter. + refined_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_refined.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_refined.begin(), e_refined.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_refined, Aec3Fft::Window::kRectangular, &E_refined); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + + // Apply the coarse filter. + coarse_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_coarse.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_coarse.begin(), e_coarse.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kRectangular, &E_coarse); + + // Compute spectra for future use. + E_refined.Spectrum(Aec3Optimization::kNone, output[0].E2_refined); + E_coarse.Spectrum(Aec3Optimization::kNone, output[0].E2_coarse); + + // Adapt the coarse filter. + std::array render_power; + render_delay_buffer->GetRenderBuffer()->SpectralSum( + coarse_filter.SizePartitions(), &render_power); + coarse_gain.Compute(render_power, render_signal_analyzer, E_coarse, + coarse_filter.SizePartitions(), saturation, &G); + coarse_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G); + + // Adapt the refined filter + render_delay_buffer->GetRenderBuffer()->SpectralSum( + refined_filter.SizePartitions(), &render_power); + + std::array erl; + ComputeErl(optimization, H2[0], erl); + refined_gain.Compute(render_power, render_signal_analyzer, output[0], erl, + refined_filter.SizePartitions(), saturation, false, + &G); + refined_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G, &h[0]); + + // Update the delay. + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + refined_filter.ComputeFrequencyResponse(&H2[0]); + std::copy(output[0].E2_refined.begin(), output[0].E2_refined.end(), + E2_refined[0].begin()); + aec_state.Update(delay_estimate, H2, h, + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + output); + } + + std::copy(e_refined.begin(), e_refined.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Length: " << filter_length_blocks; + return ss.Release(); +} + +std::string ProduceDebugText(size_t delay, int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "Delay: " << delay << ", "; + ss << ProduceDebugText(filter_length_blocks); + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(RefinedFilterUpdateGainDeathTest, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + EchoCanceller3Config config; + RenderSignalAnalyzer analyzer(config); + SubtractorOutput output; + RefinedFilterUpdateGain gain(config.filter.refined, + config.filter.config_change_duration_blocks); + std::array render_power; + render_power.fill(0.f); + std::array erl; + erl.fill(0.f); + EXPECT_DEATH( + gain.Compute(render_power, analyzer, output, erl, + config.filter.refined.length_blocks, false, false, nullptr), + ""); +} + +#endif + +// Verifies that the gain formed causes the filter using it to converge. +TEST(RefinedFilterUpdateGain, GainCausesFilterToConverge) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks)); + + std::array e; + std::array y; + FftData G; + + RunFilterUpdateTest(600, delay_samples, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G); + + // Verify that the refined filter is able to perform well. + // Use different criteria to take overmodelling into account. + if (filter_length_blocks == 12) { + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } else { + EXPECT_LT(std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } + } + } +} + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST(RefinedFilterUpdateGain, DecreasingGain) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array G_a_power; + std::array G_b_power; + std::array G_c_power; + + RunFilterUpdateTest(250, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + RunFilterUpdateTest(500, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_b); + RunFilterUpdateTest(750, 65, 12, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); +} + +// Verifies that the gain is zero when there is saturation and that the internal +// error estimates cause the gain to increase after a period of saturation. +TEST(RefinedFilterUpdateGain, SaturationBehavior) { + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::array e; + std::array y; + FftData G_a; + FftData G_b; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + std::array G_a_power; + std::array G_b_power; + + RunFilterUpdateTest(100, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); + + RunFilterUpdateTest(99, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + RunFilterUpdateTest(201, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + } +} + +// Verifies that the gain increases after an echo path change. +// TODO(peah): Correct and reactivate this test. +TEST(RefinedFilterUpdateGain, DISABLED_EchoPathChangeBehavior) { + for (size_t filter_length_blocks : {12, 20, 30}) { + SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); + std::vector blocks_with_echo_path_changes; + std::vector blocks_with_saturation; + blocks_with_echo_path_changes.push_back(99); + + std::array e; + std::array y; + FftData G_a; + FftData G_b; + std::array G_a_power; + std::array G_b_power; + + RunFilterUpdateTest(100, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_a); + RunFilterUpdateTest(101, 65, filter_length_blocks, + blocks_with_echo_path_changes, blocks_with_saturation, + false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc new file mode 100644 index 0000000000..aa511e2b6b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +RenderBuffer::RenderBuffer(BlockBuffer* block_buffer, + SpectrumBuffer* spectrum_buffer, + FftBuffer* fft_buffer) + : block_buffer_(block_buffer), + spectrum_buffer_(spectrum_buffer), + fft_buffer_(fft_buffer) { + RTC_DCHECK(block_buffer_); + RTC_DCHECK(spectrum_buffer_); + RTC_DCHECK(fft_buffer_); + RTC_DCHECK_EQ(block_buffer_->buffer.size(), fft_buffer_->buffer.size()); + RTC_DCHECK_EQ(spectrum_buffer_->buffer.size(), fft_buffer_->buffer.size()); + RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read); + RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write); +} + +RenderBuffer::~RenderBuffer() = default; + +void RenderBuffer::SpectralSum( + size_t num_spectra, + std::array* X2) const { + X2->fill(0.f); + int position = spectrum_buffer_->read; + for (size_t j = 0; j < num_spectra; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2->size(); ++k) { + (*X2)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } +} + +void RenderBuffer::SpectralSums( + size_t num_spectra_shorter, + size_t num_spectra_longer, + std::array* X2_shorter, + std::array* X2_longer) const { + RTC_DCHECK_LE(num_spectra_shorter, num_spectra_longer); + X2_shorter->fill(0.f); + int position = spectrum_buffer_->read; + size_t j = 0; + for (; j < num_spectra_shorter; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2_shorter->size(); ++k) { + (*X2_shorter)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } + std::copy(X2_shorter->begin(), X2_shorter->end(), X2_longer->begin()); + for (; j < num_spectra_longer; ++j) { + for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) { + for (size_t k = 0; k < X2_longer->size(); ++k) { + (*X2_longer)[k] += channel_spectrum[k]; + } + } + position = spectrum_buffer_->IncIndex(position); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h new file mode 100644 index 0000000000..8adc996087 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/fft_buffer.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Provides a buffer of the render data for the echo remover. +class RenderBuffer { + public: + RenderBuffer(BlockBuffer* block_buffer, + SpectrumBuffer* spectrum_buffer, + FftBuffer* fft_buffer); + + RenderBuffer() = delete; + RenderBuffer(const RenderBuffer&) = delete; + RenderBuffer& operator=(const RenderBuffer&) = delete; + + ~RenderBuffer(); + + // Get a block. + const Block& GetBlock(int buffer_offset_blocks) const { + int position = + block_buffer_->OffsetIndex(block_buffer_->read, buffer_offset_blocks); + return block_buffer_->buffer[position]; + } + + // Get the spectrum from one of the FFTs in the buffer. + rtc::ArrayView> Spectrum( + int buffer_offset_ffts) const { + int position = spectrum_buffer_->OffsetIndex(spectrum_buffer_->read, + buffer_offset_ffts); + return spectrum_buffer_->buffer[position]; + } + + // Returns the circular fft buffer. + rtc::ArrayView> GetFftBuffer() const { + return fft_buffer_->buffer; + } + + // Returns the current position in the circular buffer. + size_t Position() const { + RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read); + RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write); + return fft_buffer_->read; + } + + // Returns the sum of the spectrums for a certain number of FFTs. + void SpectralSum(size_t num_spectra, + std::array* X2) const; + + // Returns the sums of the spectrums for two numbers of FFTs. + void SpectralSums(size_t num_spectra_shorter, + size_t num_spectra_longer, + std::array* X2_shorter, + std::array* X2_longer) const; + + // Gets the recent activity seen in the render signal. + bool GetRenderActivity() const { return render_activity_; } + + // Specifies the recent activity seen in the render signal. + void SetRenderActivity(bool activity) { render_activity_ = activity; } + + // Returns the headroom between the write and the read positions in the + // buffer. + int Headroom() const { + // The write and read indices are decreased over time. + int headroom = + fft_buffer_->write < fft_buffer_->read + ? fft_buffer_->read - fft_buffer_->write + : fft_buffer_->size - fft_buffer_->write + fft_buffer_->read; + + RTC_DCHECK_LE(0, headroom); + RTC_DCHECK_GE(fft_buffer_->size, headroom); + + return headroom; + } + + // Returns a reference to the spectrum buffer. + const SpectrumBuffer& GetSpectrumBuffer() const { return *spectrum_buffer_; } + + // Returns a reference to the block buffer. + const BlockBuffer& GetBlockBuffer() const { return *block_buffer_; } + + private: + const BlockBuffer* const block_buffer_; + const SpectrumBuffer* const spectrum_buffer_; + const FftBuffer* const fft_buffer_; + bool render_activity_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build new file mode 100644 index 0000000000..b7a10f5d7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("render_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc new file mode 100644 index 0000000000..5d9d646e76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_buffer_unittest.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include +#include +#include + +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null fft buffer. +TEST(RenderBufferDeathTest, NullExternalFftBuffer) { + BlockBuffer block_buffer(10, 3, 1); + SpectrumBuffer spectrum_buffer(10, 1); + EXPECT_DEATH(RenderBuffer(&block_buffer, &spectrum_buffer, nullptr), ""); +} + +// Verifies the check for non-null spectrum buffer. +TEST(RenderBufferDeathTest, NullExternalSpectrumBuffer) { + FftBuffer fft_buffer(10, 1); + BlockBuffer block_buffer(10, 3, 1); + EXPECT_DEATH(RenderBuffer(&block_buffer, nullptr, &fft_buffer), ""); +} + +// Verifies the check for non-null block buffer. +TEST(RenderBufferDeathTest, NullExternalBlockBuffer) { + FftBuffer fft_buffer(10, 1); + SpectrumBuffer spectrum_buffer(10, 1); + EXPECT_DEATH(RenderBuffer(nullptr, &spectrum_buffer, &fft_buffer), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc new file mode 100644 index 0000000000..ec5d35507e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.cc @@ -0,0 +1,519 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/alignment_mixer.h" +#include "modules/audio_processing/aec3/block_buffer.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/fft_buffer.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +bool UpdateCaptureCallCounterOnSkippedBlocks() { + return !field_trial::IsEnabled( + "WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch"); +} + +class RenderDelayBufferImpl final : public RenderDelayBuffer { + public: + RenderDelayBufferImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels); + RenderDelayBufferImpl() = delete; + ~RenderDelayBufferImpl() override; + + void Reset() override; + BufferingEvent Insert(const Block& block) override; + BufferingEvent PrepareCaptureProcessing() override; + void HandleSkippedCaptureProcessing() override; + bool AlignFromDelay(size_t delay) override; + void AlignFromExternalDelay() override; + size_t Delay() const override { return ComputeDelay(); } + size_t MaxDelay() const override { + return blocks_.buffer.size() - 1 - buffer_headroom_; + } + RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; } + + const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override { + return low_rate_; + } + + int BufferLatency() const; + void SetAudioBufferDelay(int delay_ms) override; + bool HasReceivedBufferDelay() override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const bool update_capture_call_counter_on_skipped_blocks_; + const float render_linear_amplitude_gain_; + const rtc::LoggingSeverity delay_log_level_; + size_t down_sampling_factor_; + const int sub_block_size_; + BlockBuffer blocks_; + SpectrumBuffer spectra_; + FftBuffer ffts_; + absl::optional delay_; + RenderBuffer echo_remover_buffer_; + DownsampledRenderBuffer low_rate_; + AlignmentMixer render_mixer_; + Decimator render_decimator_; + const Aec3Fft fft_; + std::vector render_ds_; + const int buffer_headroom_; + bool last_call_was_render_ = false; + int num_api_calls_in_a_row_ = 0; + int max_observed_jitter_ = 1; + int64_t capture_call_counter_ = 0; + int64_t render_call_counter_ = 0; + bool render_activity_ = false; + size_t render_activity_counter_ = 0; + absl::optional external_audio_buffer_delay_; + bool external_audio_buffer_delay_verified_after_reset_ = false; + size_t min_latency_blocks_ = 0; + size_t excess_render_detection_counter_ = 0; + + int MapDelayToTotalDelay(size_t delay) const; + int ComputeDelay() const; + void ApplyTotalDelay(int delay); + void InsertBlock(const Block& block, int previous_write); + bool DetectActiveRender(rtc::ArrayView x) const; + bool DetectExcessRenderBlocks(); + void IncrementWriteIndices(); + void IncrementLowRateReadIndices(); + void IncrementReadIndices(); + bool RenderOverrun(); + bool RenderUnderrun(); +}; + +std::atomic RenderDelayBufferImpl::instance_count_ = 0; + +RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(DetectOptimization()), + config_(config), + update_capture_call_counter_on_skipped_blocks_( + UpdateCaptureCallCounterOnSkippedBlocks()), + render_linear_amplitude_gain_( + std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)), + delay_log_level_(config_.delay.log_warning_on_delay_changes + ? rtc::LS_WARNING + : rtc::LS_VERBOSE), + down_sampling_factor_(config.delay.down_sampling_factor), + sub_block_size_(static_cast(down_sampling_factor_ > 0 + ? kBlockSize / down_sampling_factor_ + : kBlockSize)), + blocks_(GetRenderDelayBufferSize(down_sampling_factor_, + config.delay.num_filters, + config.filter.refined.length_blocks), + NumBandsForRate(sample_rate_hz), + num_render_channels), + spectra_(blocks_.buffer.size(), num_render_channels), + ffts_(blocks_.buffer.size(), num_render_channels), + delay_(config_.delay.default_delay), + echo_remover_buffer_(&blocks_, &spectra_, &ffts_), + low_rate_(GetDownSampledBufferSize(down_sampling_factor_, + config.delay.num_filters)), + render_mixer_(num_render_channels, config.delay.render_alignment_mixing), + render_decimator_(down_sampling_factor_), + fft_(), + render_ds_(sub_block_size_, 0.f), + buffer_headroom_(config.filter.refined.length_blocks) { + RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size()); + RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size()); + for (size_t i = 0; i < blocks_.buffer.size(); ++i) { + RTC_DCHECK_EQ(blocks_.buffer[i].NumChannels(), ffts_.buffer[i].size()); + RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size()); + } + + Reset(); +} + +RenderDelayBufferImpl::~RenderDelayBufferImpl() = default; + +// Resets the buffer delays and clears the reported delays. +void RenderDelayBufferImpl::Reset() { + last_call_was_render_ = false; + num_api_calls_in_a_row_ = 1; + min_latency_blocks_ = 0; + excess_render_detection_counter_ = 0; + + // Initialize the read index to one sub-block before the write index. + low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, sub_block_size_); + + // Check for any external audio buffer delay and whether it is feasible. + if (external_audio_buffer_delay_) { + const int headroom = 2; + size_t audio_buffer_delay_to_set; + // Minimum delay is 1 (like the low-rate render buffer). + if (*external_audio_buffer_delay_ <= headroom) { + audio_buffer_delay_to_set = 1; + } else { + audio_buffer_delay_to_set = *external_audio_buffer_delay_ - headroom; + } + + audio_buffer_delay_to_set = std::min(audio_buffer_delay_to_set, MaxDelay()); + + // When an external delay estimate is available, use that delay as the + // initial render buffer delay. + ApplyTotalDelay(audio_buffer_delay_to_set); + delay_ = ComputeDelay(); + + external_audio_buffer_delay_verified_after_reset_ = false; + } else { + // If an external delay estimate is not available, use that delay as the + // initial delay. Set the render buffer delays to the default delay. + ApplyTotalDelay(config_.delay.default_delay); + + // Unset the delays which are set by AlignFromDelay. + delay_ = absl::nullopt; + } +} + +// Inserts a new block into the render buffers. +RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert( + const Block& block) { + ++render_call_counter_; + if (delay_) { + if (!last_call_was_render_) { + last_call_was_render_ = true; + num_api_calls_in_a_row_ = 1; + } else { + if (++num_api_calls_in_a_row_ > max_observed_jitter_) { + max_observed_jitter_ = num_api_calls_in_a_row_; + RTC_LOG_V(delay_log_level_) + << "New max number api jitter observed at render block " + << render_call_counter_ << ": " << num_api_calls_in_a_row_ + << " blocks"; + } + } + } + + // Increase the write indices to where the new blocks should be written. + const int previous_write = blocks_.write; + IncrementWriteIndices(); + + // Allow overrun and do a reset when render overrun occurrs due to more render + // data being inserted than capture data is received. + BufferingEvent event = + RenderOverrun() ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone; + + // Detect and update render activity. + if (!render_activity_) { + render_activity_counter_ += + DetectActiveRender(block.View(/*band=*/0, /*channel=*/0)) ? 1 : 0; + render_activity_ = render_activity_counter_ >= 20; + } + + // Insert the new render block into the specified position. + InsertBlock(block, previous_write); + + if (event != BufferingEvent::kNone) { + Reset(); + } + + return event; +} + +void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() { + if (update_capture_call_counter_on_skipped_blocks_) { + ++capture_call_counter_; + } +} + +// Prepares the render buffers for processing another capture block. +RenderDelayBuffer::BufferingEvent +RenderDelayBufferImpl::PrepareCaptureProcessing() { + RenderDelayBuffer::BufferingEvent event = BufferingEvent::kNone; + ++capture_call_counter_; + + if (delay_) { + if (last_call_was_render_) { + last_call_was_render_ = false; + num_api_calls_in_a_row_ = 1; + } else { + if (++num_api_calls_in_a_row_ > max_observed_jitter_) { + max_observed_jitter_ = num_api_calls_in_a_row_; + RTC_LOG_V(delay_log_level_) + << "New max number api jitter observed at capture block " + << capture_call_counter_ << ": " << num_api_calls_in_a_row_ + << " blocks"; + } + } + } + + if (DetectExcessRenderBlocks()) { + // Too many render blocks compared to capture blocks. Risk of delay ending + // up before the filter used by the delay estimator. + RTC_LOG_V(delay_log_level_) + << "Excess render blocks detected at block " << capture_call_counter_; + Reset(); + event = BufferingEvent::kRenderOverrun; + } else if (RenderUnderrun()) { + // Don't increment the read indices of the low rate buffer if there is a + // render underrun. + RTC_LOG_V(delay_log_level_) + << "Render buffer underrun detected at block " << capture_call_counter_; + IncrementReadIndices(); + // Incrementing the buffer index without increasing the low rate buffer + // index means that the delay is reduced by one. + if (delay_ && *delay_ > 0) + delay_ = *delay_ - 1; + event = BufferingEvent::kRenderUnderrun; + } else { + // Increment the read indices in the render buffers to point to the most + // recent block to use in the capture processing. + IncrementLowRateReadIndices(); + IncrementReadIndices(); + } + + echo_remover_buffer_.SetRenderActivity(render_activity_); + if (render_activity_) { + render_activity_counter_ = 0; + render_activity_ = false; + } + + return event; +} + +// Sets the delay and returns a bool indicating whether the delay was changed. +bool RenderDelayBufferImpl::AlignFromDelay(size_t delay) { + RTC_DCHECK(!config_.delay.use_external_delay_estimator); + if (!external_audio_buffer_delay_verified_after_reset_ && + external_audio_buffer_delay_ && delay_) { + int difference = static_cast(delay) - static_cast(*delay_); + RTC_LOG_V(delay_log_level_) + << "Mismatch between first estimated delay after reset " + "and externally reported audio buffer delay: " + << difference << " blocks"; + external_audio_buffer_delay_verified_after_reset_ = true; + } + if (delay_ && *delay_ == delay) { + return false; + } + delay_ = delay; + + // Compute the total delay and limit the delay to the allowed range. + int total_delay = MapDelayToTotalDelay(*delay_); + total_delay = + std::min(MaxDelay(), static_cast(std::max(total_delay, 0))); + + // Apply the delay to the buffers. + ApplyTotalDelay(total_delay); + return true; +} + +void RenderDelayBufferImpl::SetAudioBufferDelay(int delay_ms) { + if (!external_audio_buffer_delay_) { + RTC_LOG_V(delay_log_level_) + << "Receiving a first externally reported audio buffer delay of " + << delay_ms << " ms."; + } + + // Convert delay from milliseconds to blocks (rounded down). + external_audio_buffer_delay_ = delay_ms / 4; +} + +bool RenderDelayBufferImpl::HasReceivedBufferDelay() { + return external_audio_buffer_delay_.has_value(); +} + +// Maps the externally computed delay to the delay used internally. +int RenderDelayBufferImpl::MapDelayToTotalDelay( + size_t external_delay_blocks) const { + const int latency_blocks = BufferLatency(); + return latency_blocks + static_cast(external_delay_blocks); +} + +// Returns the delay (not including call jitter). +int RenderDelayBufferImpl::ComputeDelay() const { + const int latency_blocks = BufferLatency(); + int internal_delay = spectra_.read >= spectra_.write + ? spectra_.read - spectra_.write + : spectra_.size + spectra_.read - spectra_.write; + + return internal_delay - latency_blocks; +} + +// Set the read indices according to the delay. +void RenderDelayBufferImpl::ApplyTotalDelay(int delay) { + RTC_LOG_V(delay_log_level_) + << "Applying total delay of " << delay << " blocks."; + blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay); + spectra_.read = spectra_.OffsetIndex(spectra_.write, delay); + ffts_.read = ffts_.OffsetIndex(ffts_.write, delay); +} + +void RenderDelayBufferImpl::AlignFromExternalDelay() { + RTC_DCHECK(config_.delay.use_external_delay_estimator); + if (external_audio_buffer_delay_) { + const int64_t delay = render_call_counter_ - capture_call_counter_ + + *external_audio_buffer_delay_; + const int64_t delay_with_headroom = + delay - config_.delay.delay_headroom_samples / kBlockSize; + ApplyTotalDelay(delay_with_headroom); + } +} + +// Inserts a block into the render buffers. +void RenderDelayBufferImpl::InsertBlock(const Block& block, + int previous_write) { + auto& b = blocks_; + auto& lr = low_rate_; + auto& ds = render_ds_; + auto& f = ffts_; + auto& s = spectra_; + const size_t num_bands = b.buffer[b.write].NumBands(); + const size_t num_render_channels = b.buffer[b.write].NumChannels(); + RTC_DCHECK_EQ(block.NumBands(), num_bands); + RTC_DCHECK_EQ(block.NumChannels(), num_render_channels); + for (size_t band = 0; band < num_bands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + std::copy(block.begin(band, ch), block.end(band, ch), + b.buffer[b.write].begin(band, ch)); + } + } + + if (render_linear_amplitude_gain_ != 1.f) { + for (size_t band = 0; band < num_bands; ++band) { + for (size_t ch = 0; ch < num_render_channels; ++ch) { + rtc::ArrayView b_view = + b.buffer[b.write].View(band, ch); + for (float& sample : b_view) { + sample *= render_linear_amplitude_gain_; + } + } + } + } + + std::array downmixed_render; + render_mixer_.ProduceOutput(b.buffer[b.write], downmixed_render); + render_decimator_.Decimate(downmixed_render, ds); + data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(), + 16000 / down_sampling_factor_, 1); + std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write); + for (int channel = 0; channel < b.buffer[b.write].NumChannels(); ++channel) { + fft_.PaddedFft(b.buffer[b.write].View(/*band=*/0, channel), + b.buffer[previous_write].View(/*band=*/0, channel), + &f.buffer[f.write][channel]); + f.buffer[f.write][channel].Spectrum(optimization_, + s.buffer[s.write][channel]); + } +} + +bool RenderDelayBufferImpl::DetectActiveRender( + rtc::ArrayView x) const { + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + return x_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2; +} + +bool RenderDelayBufferImpl::DetectExcessRenderBlocks() { + bool excess_render_detected = false; + const size_t latency_blocks = static_cast(BufferLatency()); + // The recently seen minimum latency in blocks. Should be close to 0. + min_latency_blocks_ = std::min(min_latency_blocks_, latency_blocks); + // After processing a configurable number of blocks the minimum latency is + // checked. + if (++excess_render_detection_counter_ >= + config_.buffering.excess_render_detection_interval_blocks) { + // If the minimum latency is not lower than the threshold there have been + // more render than capture frames. + excess_render_detected = min_latency_blocks_ > + config_.buffering.max_allowed_excess_render_blocks; + // Reset the counter and let the minimum latency be the current latency. + min_latency_blocks_ = latency_blocks; + excess_render_detection_counter_ = 0; + } + + data_dumper_->DumpRaw("aec3_latency_blocks", latency_blocks); + data_dumper_->DumpRaw("aec3_min_latency_blocks", min_latency_blocks_); + data_dumper_->DumpRaw("aec3_excess_render_detected", excess_render_detected); + return excess_render_detected; +} + +// Computes the latency in the buffer (the number of unread sub-blocks). +int RenderDelayBufferImpl::BufferLatency() const { + const DownsampledRenderBuffer& l = low_rate_; + int latency_samples = (l.buffer.size() + l.read - l.write) % l.buffer.size(); + int latency_blocks = latency_samples / sub_block_size_; + return latency_blocks; +} + +// Increments the write indices for the render buffers. +void RenderDelayBufferImpl::IncrementWriteIndices() { + low_rate_.UpdateWriteIndex(-sub_block_size_); + blocks_.IncWriteIndex(); + spectra_.DecWriteIndex(); + ffts_.DecWriteIndex(); +} + +// Increments the read indices of the low rate render buffers. +void RenderDelayBufferImpl::IncrementLowRateReadIndices() { + low_rate_.UpdateReadIndex(-sub_block_size_); +} + +// Increments the read indices for the render buffers. +void RenderDelayBufferImpl::IncrementReadIndices() { + if (blocks_.read != blocks_.write) { + blocks_.IncReadIndex(); + spectra_.DecReadIndex(); + ffts_.DecReadIndex(); + } +} + +// Checks for a render buffer overrun. +bool RenderDelayBufferImpl::RenderOverrun() { + return low_rate_.read == low_rate_.write || blocks_.read == blocks_.write; +} + +// Checks for a render buffer underrun. +bool RenderDelayBufferImpl::RenderUnderrun() { + return low_rate_.read == low_rate_.write; +} + +} // namespace + +RenderDelayBuffer* RenderDelayBuffer::Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels) { + return new RenderDelayBufferImpl(config, sample_rate_hz, num_render_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h new file mode 100644 index 0000000000..6dc1aefb85 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ + +#include + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" + +namespace webrtc { + +// Class for buffering the incoming render blocks such that these may be +// extracted with a specified delay. +class RenderDelayBuffer { + public: + enum class BufferingEvent { + kNone, + kRenderUnderrun, + kRenderOverrun, + kApiCallSkew + }; + + static RenderDelayBuffer* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_render_channels); + virtual ~RenderDelayBuffer() = default; + + // Resets the buffer alignment. + virtual void Reset() = 0; + + // Inserts a block into the buffer. + virtual BufferingEvent Insert(const Block& block) = 0; + + // Updates the buffers one step based on the specified buffer delay. Returns + // an enum indicating whether there was a special event that occurred. + virtual BufferingEvent PrepareCaptureProcessing() = 0; + + // Called on capture blocks where PrepareCaptureProcessing is not called. + virtual void HandleSkippedCaptureProcessing() = 0; + + // Sets the buffer delay and returns a bool indicating whether the delay + // changed. + virtual bool AlignFromDelay(size_t delay) = 0; + + // Sets the buffer delay from the most recently reported external delay. + virtual void AlignFromExternalDelay() = 0; + + // Gets the buffer delay. + virtual size_t Delay() const = 0; + + // Gets the buffer delay. + virtual size_t MaxDelay() const = 0; + + // Returns the render buffer for the echo remover. + virtual RenderBuffer* GetRenderBuffer() = 0; + + // Returns the downsampled render buffer. + virtual const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const = 0; + + // Returns the maximum non calusal offset that can occur in the delay buffer. + static int DelayEstimatorOffset(const EchoCanceller3Config& config); + + // Provides an optional external estimate of the audio buffer delay. + virtual void SetAudioBufferDelay(int delay_ms) = 0; + + // Returns whether an external delay estimate has been reported via + // SetAudioBufferDelay. + virtual bool HasReceivedBufferDelay() = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc new file mode 100644 index 0000000000..d51e06a1ac --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +} // namespace + +// Verifies that the buffer overflow is correctly reported. +TEST(RenderDelayBuffer, BufferOverflow) { + const EchoCanceller3Config config; + for (auto num_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, rate, num_channels)); + Block block_to_insert(NumBandsForRate(rate), num_channels); + for (size_t k = 0; k < 10; ++k) { + EXPECT_EQ(RenderDelayBuffer::BufferingEvent::kNone, + delay_buffer->Insert(block_to_insert)); + } + bool overrun_occurred = false; + for (size_t k = 0; k < 1000; ++k) { + RenderDelayBuffer::BufferingEvent event = + delay_buffer->Insert(block_to_insert); + overrun_occurred = + overrun_occurred || + RenderDelayBuffer::BufferingEvent::kRenderOverrun == event; + } + + EXPECT_TRUE(overrun_occurred); + } + } +} + +// Verifies that the check for available block works. +TEST(RenderDelayBuffer, AvailableBlock) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), kSampleRateHz, kNumChannels)); + Block input_block(kNumBands, kNumChannels, 1.0f); + EXPECT_EQ(RenderDelayBuffer::BufferingEvent::kNone, + delay_buffer->Insert(input_block)); + delay_buffer->PrepareCaptureProcessing(); +} + +// Verifies the AlignFromDelay method. +TEST(RenderDelayBuffer, AlignFromDelay) { + EchoCanceller3Config config; + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, 16000, 1)); + ASSERT_TRUE(delay_buffer->Delay()); + delay_buffer->Reset(); + size_t initial_internal_delay = 0; + for (size_t delay = initial_internal_delay; + delay < initial_internal_delay + 20; ++delay) { + ASSERT_TRUE(delay_buffer->AlignFromDelay(delay)); + EXPECT_EQ(delay, delay_buffer->Delay()); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for feasible delay. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayBufferDeathTest, DISABLED_WrongDelay) { + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1)); + EXPECT_DEATH(delay_buffer->AlignFromDelay(21), ""); +} + +// Verifies the check for the number of bands in the inserted blocks. +TEST(RenderDelayBufferDeathTest, WrongNumberOfBands) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_channels)); + Block block_to_insert( + NumBandsForRate(rate < 48000 ? rate + 16000 : 16000), num_channels); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } + } +} + +// Verifies the check for the number of channels in the inserted blocks. +TEST(RenderDelayBufferDeathTest, WrongNumberOfChannels) { + for (auto rate : {16000, 32000, 48000}) { + for (size_t num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer(RenderDelayBuffer::Create( + EchoCanceller3Config(), rate, num_channels)); + Block block_to_insert(NumBandsForRate(rate), num_channels + 1); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc new file mode 100644 index 0000000000..465e77fb7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +class RenderDelayControllerImpl final : public RenderDelayController { + public: + RenderDelayControllerImpl(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels); + + RenderDelayControllerImpl() = delete; + RenderDelayControllerImpl(const RenderDelayControllerImpl&) = delete; + RenderDelayControllerImpl& operator=(const RenderDelayControllerImpl&) = + delete; + + ~RenderDelayControllerImpl() override; + void Reset(bool reset_delay_confidence) override; + void LogRenderCall() override; + absl::optional GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) override; + bool HasClockdrift() const override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const int hysteresis_limit_blocks_; + absl::optional delay_; + EchoPathDelayEstimator delay_estimator_; + RenderDelayControllerMetrics metrics_; + absl::optional delay_samples_; + size_t capture_call_counter_ = 0; + int delay_change_counter_ = 0; + DelayEstimate::Quality last_delay_estimate_quality_; +}; + +DelayEstimate ComputeBufferDelay( + const absl::optional& current_delay, + int hysteresis_limit_blocks, + DelayEstimate estimated_delay) { + // Compute the buffer delay increase required to achieve the desired latency. + size_t new_delay_blocks = estimated_delay.delay >> kBlockSizeLog2; + // Add hysteresis. + if (current_delay) { + size_t current_delay_blocks = current_delay->delay; + if (new_delay_blocks > current_delay_blocks && + new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) { + new_delay_blocks = current_delay_blocks; + } + } + DelayEstimate new_delay = estimated_delay; + new_delay.delay = new_delay_blocks; + return new_delay; +} + +std::atomic RenderDelayControllerImpl::instance_count_(0); + +RenderDelayControllerImpl::RenderDelayControllerImpl( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + hysteresis_limit_blocks_( + static_cast(config.delay.hysteresis_limit_blocks)), + delay_estimator_(data_dumper_.get(), config, num_capture_channels), + last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); + delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, 0); +} + +RenderDelayControllerImpl::~RenderDelayControllerImpl() = default; + +void RenderDelayControllerImpl::Reset(bool reset_delay_confidence) { + delay_ = absl::nullopt; + delay_samples_ = absl::nullopt; + delay_estimator_.Reset(reset_delay_confidence); + delay_change_counter_ = 0; + if (reset_delay_confidence) { + last_delay_estimate_quality_ = DelayEstimate::Quality::kCoarse; + } +} + +void RenderDelayControllerImpl::LogRenderCall() {} + +absl::optional RenderDelayControllerImpl::GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) { + ++capture_call_counter_; + + auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture); + + if (delay_samples) { + if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) { + delay_change_counter_ = 0; + } + if (delay_samples_) { + delay_samples_->blocks_since_last_change = + delay_samples_->delay == delay_samples->delay + ? delay_samples_->blocks_since_last_change + 1 + : 0; + delay_samples_->blocks_since_last_update = 0; + delay_samples_->delay = delay_samples->delay; + delay_samples_->quality = delay_samples->quality; + } else { + delay_samples_ = delay_samples; + } + } else { + if (delay_samples_) { + ++delay_samples_->blocks_since_last_change; + ++delay_samples_->blocks_since_last_update; + } + } + + if (delay_change_counter_ < 2 * kNumBlocksPerSecond) { + ++delay_change_counter_; + } + + if (delay_samples_) { + // Compute the render delay buffer delay. + const bool use_hysteresis = + last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined && + delay_samples_->quality == DelayEstimate::Quality::kRefined; + delay_ = ComputeBufferDelay( + delay_, use_hysteresis ? hysteresis_limit_blocks_ : 0, *delay_samples_); + last_delay_estimate_quality_ = delay_samples_->quality; + } + + metrics_.Update( + delay_samples_ ? absl::optional(delay_samples_->delay) + : absl::nullopt, + delay_ ? absl::optional(delay_->delay) : absl::nullopt, + delay_estimator_.Clockdrift()); + + data_dumper_->DumpRaw("aec3_render_delay_controller_delay", + delay_samples ? delay_samples->delay : 0); + data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay", + delay_ ? delay_->delay : 0); + + return delay_; +} + +bool RenderDelayControllerImpl::HasClockdrift() const { + return delay_estimator_.Clockdrift() != ClockdriftDetector::Level::kNone; +} + +} // namespace + +RenderDelayController* RenderDelayController::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels) { + return new RenderDelayControllerImpl(config, sample_rate_hz, + num_capture_channels); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h new file mode 100644 index 0000000000..4a18a11e36 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/delay_estimate.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Class for aligning the render and capture signal using a RenderDelayBuffer. +class RenderDelayController { + public: + static RenderDelayController* Create(const EchoCanceller3Config& config, + int sample_rate_hz, + size_t num_capture_channels); + virtual ~RenderDelayController() = default; + + // Resets the delay controller. If the delay confidence is reset, the reset + // behavior is as if the call is restarted. + virtual void Reset(bool reset_delay_confidence) = 0; + + // Logs a render call. + virtual void LogRenderCall() = 0; + + // Aligns the render buffer content with the capture signal. + virtual absl::optional GetDelay( + const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const Block& capture) = 0; + + // Returns true if clockdrift has been detected. + virtual bool HasClockdrift() const = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc new file mode 100644 index 0000000000..1e0a0f443e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class DelayReliabilityCategory { + kNone, + kPoor, + kMedium, + kGood, + kExcellent, + kNumCategories +}; +enum class DelayChangesCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +RenderDelayControllerMetrics::RenderDelayControllerMetrics() = default; + +void RenderDelayControllerMetrics::Update( + absl::optional delay_samples, + absl::optional buffer_delay_blocks, + ClockdriftDetector::Level clockdrift) { + ++call_counter_; + + if (!initial_update) { + size_t delay_blocks; + if (delay_samples) { + ++reliable_delay_estimate_counter_; + // Add an offset by 1 (metric is halved before reporting) to reserve 0 for + // absent delay. + delay_blocks = (*delay_samples) / kBlockSize + 2; + } else { + delay_blocks = 0; + } + + if (delay_blocks != delay_blocks_) { + ++delay_change_counter_; + delay_blocks_ = delay_blocks; + } + + } else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) { + initial_update = false; + } + + if (call_counter_ == kMetricsReportingIntervalBlocks) { + int value_to_report = static_cast(delay_blocks_); + // Divide by 2 to compress metric range. + value_to_report = std::min(124, value_to_report >> 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.EchoPathDelay", + value_to_report, 0, 124, 125); + + // Divide by 2 to compress metric range. + // Offset by 1 to reserve 0 for absent delay. + value_to_report = buffer_delay_blocks ? (*buffer_delay_blocks + 2) >> 1 : 0; + value_to_report = std::min(124, value_to_report); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.BufferDelay", + value_to_report, 0, 124, 125); + + DelayReliabilityCategory delay_reliability; + if (reliable_delay_estimate_counter_ == 0) { + delay_reliability = DelayReliabilityCategory::kNone; + } else if (reliable_delay_estimate_counter_ > (call_counter_ >> 1)) { + delay_reliability = DelayReliabilityCategory::kExcellent; + } else if (reliable_delay_estimate_counter_ > 100) { + delay_reliability = DelayReliabilityCategory::kGood; + } else if (reliable_delay_estimate_counter_ > 10) { + delay_reliability = DelayReliabilityCategory::kMedium; + } else { + delay_reliability = DelayReliabilityCategory::kPoor; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates", + static_cast(delay_reliability), + static_cast(DelayReliabilityCategory::kNumCategories)); + + DelayChangesCategory delay_changes; + if (delay_change_counter_ == 0) { + delay_changes = DelayChangesCategory::kNone; + } else if (delay_change_counter_ > 10) { + delay_changes = DelayChangesCategory::kConstant; + } else if (delay_change_counter_ > 5) { + delay_changes = DelayChangesCategory::kMany; + } else if (delay_change_counter_ > 2) { + delay_changes = DelayChangesCategory::kSeveral; + } else { + delay_changes = DelayChangesCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.DelayChanges", + static_cast(delay_changes), + static_cast(DelayChangesCategory::kNumCategories)); + + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.Clockdrift", static_cast(clockdrift), + static_cast(ClockdriftDetector::Level::kNumCategories)); + + call_counter_ = 0; + ResetMetrics(); + } +} + +void RenderDelayControllerMetrics::ResetMetrics() { + delay_change_counter_ = 0; + reliable_delay_estimate_counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h new file mode 100644 index 0000000000..b81833b43f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/clockdrift_detector.h" + +namespace webrtc { + +// Handles the reporting of metrics for the render delay controller. +class RenderDelayControllerMetrics { + public: + RenderDelayControllerMetrics(); + + RenderDelayControllerMetrics(const RenderDelayControllerMetrics&) = delete; + RenderDelayControllerMetrics& operator=(const RenderDelayControllerMetrics&) = + delete; + + // Updates the metric with new data. + void Update(absl::optional delay_samples, + absl::optional buffer_delay_blocks, + ClockdriftDetector::Level clockdrift); + + private: + // Resets the metrics. + void ResetMetrics(); + + size_t delay_blocks_ = 0; + int reliable_delay_estimate_counter_ = 0; + int delay_change_counter_ = 0; + int call_counter_ = 0; + int initial_call_counter_ = 0; + bool initial_update = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc new file mode 100644 index 0000000000..cf9df6b297 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" + +#include "absl/types/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of RenderDelayControllerMetrics. +TEST(RenderDelayControllerMetrics, NormalUsage) { + metrics::Reset(); + + RenderDelayControllerMetrics metrics; + + int expected_num_metric_reports = 0; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(absl::nullopt, absl::nullopt, + ClockdriftDetector::Level::kNone); + } + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.EchoPathDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.BufferDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ(metrics::NumSamples( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.DelayChanges"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.Clockdrift"), + expected_num_metric_reports); + + // We expect metric reports every kMetricsReportingIntervalBlocks blocks. + ++expected_num_metric_reports; + + metrics.Update(absl::nullopt, absl::nullopt, + ClockdriftDetector::Level::kNone); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.EchoPathDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.BufferDelay"), + expected_num_metric_reports); + EXPECT_METRIC_EQ(metrics::NumSamples( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.DelayChanges"), + expected_num_metric_reports); + EXPECT_METRIC_EQ( + metrics::NumSamples("WebRTC.Audio.EchoCanceller.Clockdrift"), + expected_num_metric_reports); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc new file mode 100644 index 0000000000..e1a54fca9e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.Release(); +} + +std::string ProduceDebugText(int sample_rate_hz, + size_t delay, + size_t num_render_channels, + size_t num_capture_channels) { + rtc::StringBuilder ss; + ss << ProduceDebugText(sample_rate_hz) << ", Delay: " << delay + << ", Num render channels: " << num_render_channels + << ", Num capture channels: " << num_capture_channels; + return ss.Release(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + +} // namespace + +// Verifies the output of GetDelay when there are no AnalyzeRender calls. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_NoRenderSignal) { + for (size_t num_render_channels : {1, 2, 8}) { + Block block(/*num_bands=1*/ 1, /*num_channels=*/1); + EchoCanceller3Config config; + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + /*num_capture_channels*/ 1)); + for (size_t k = 0; k < 100; ++k) { + auto delay = delay_controller->GetDelay( + delay_buffer->GetDownsampledRenderBuffer(), + delay_buffer->Delay(), block); + EXPECT_FALSE(delay->delay); + } + } + } + } + } +} + +// Verifies the basic API call sequence. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_BasicApiCalls) { + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + Block capture_block(/*num_bands=*/1, num_capture_channels); + absl::optional delay_blocks; + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate, + num_capture_channels)); + for (size_t k = 0; k < 10; ++k) { + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + EXPECT_TRUE(delay_blocks); + EXPECT_FALSE(delay_blocks->delay); + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_Alignment) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + Block capture_block(/*num_bands=*/1, num_capture_channels); + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (size_t num_render_channels : {1, 2, 8}) { + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + + for (size_t delay_samples : {15, 50, 150, 200, 800, 4000}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(delay_samples); + for (size_t k = 0; k < (400 + delay_samples / kBlockSize); ++k) { + for (int band = 0; band < render_block.NumBands(); ++band) { + for (int channel = 0; channel < render_block.NumChannels(); + ++channel) { + RandomizeSampleVector(&random_generator, + render_block.View(band, channel)); + } + } + signal_delay_buffer.Delay( + render_block.View(/*band=*/0, /*channel=*/0), + capture_block.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + ASSERT_TRUE(!!delay_blocks); + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + + EXPECT_EQ(expected_delay_blocks, delay_blocks->delay); + } + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to properly handle noncausal +// delays. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_NonCausalAlignment) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + Block capture_block(NumBandsForRate(rate), num_capture_channels); + + for (int delay_samples : {-15, -50, -150, -200}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, -delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(-delay_samples); + for (int k = 0; + k < (400 - delay_samples / static_cast(kBlockSize)); + ++k) { + RandomizeSampleVector( + &random_generator, + capture_block.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay( + capture_block.View(/*band=*/0, /*channel=*/0), + render_block.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer->Insert(render_block); + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block); + } + + ASSERT_FALSE(delay_blocks); + } + } + } + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals when there is jitter in the API calls. +// TODO(bugs.webrtc.org/11161): Re-enable tests. +TEST(RenderDelayController, DISABLED_AlignmentWithJitter) { + Random random_generator(42U); + for (size_t num_capture_channels : {1, 2, 4}) { + for (size_t num_render_channels : {1, 2, 8}) { + Block capture_block( + /*num_bands=*/1, num_capture_channels); + for (size_t num_matched_filters = 4; num_matched_filters <= 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = num_matched_filters; + config.delay.capture_alignment_mixing.downmix = false; + config.delay.capture_alignment_mixing.adaptive_selection = false; + + for (auto rate : {16000, 32000, 48000}) { + Block render_block(NumBandsForRate(rate), num_render_channels); + for (size_t delay_samples : {15, 50, 300, 800}) { + absl::optional delay_blocks; + SCOPED_TRACE(ProduceDebugText(rate, delay_samples, + num_render_channels, + num_capture_channels)); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, num_render_channels)); + std::unique_ptr delay_controller( + RenderDelayController::Create(config, rate, + num_capture_channels)); + DelayBuffer signal_delay_buffer(delay_samples); + constexpr size_t kMaxTestJitterBlocks = 26; + for (size_t j = 0; j < (1000 + delay_samples / kBlockSize) / + kMaxTestJitterBlocks + + 1; + ++j) { + std::vector capture_block_buffer; + for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) { + RandomizeSampleVector( + &random_generator, + render_block.View(/*band=*/0, /*channel=*/0)); + signal_delay_buffer.Delay( + render_block.View(/*band=*/0, /*channel=*/0), + capture_block.View(/*band=*/0, /*channel=*/0)); + capture_block_buffer.push_back(capture_block); + render_delay_buffer->Insert(render_block); + } + for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) { + render_delay_buffer->PrepareCaptureProcessing(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), capture_block_buffer[k]); + } + } + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + if (expected_delay_blocks < 2) { + expected_delay_blocks = 0; + } + + ASSERT_TRUE(delay_blocks); + EXPECT_EQ(expected_delay_blocks, delay_blocks->delay); + } + } + } + } + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayControllerDeathTest, DISABLED_WrongSampleRate) { + for (auto rate : {-1, 0, 8001, 16001}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, rate, 1)); + EXPECT_DEATH( + std::unique_ptr( + RenderDelayController::Create(EchoCanceller3Config(), rate, 1)), + ""); + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc new file mode 100644 index 0000000000..bfbeb0ec2e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +constexpr size_t kCounterThreshold = 5; + +// Identifies local bands with narrow characteristics. +void IdentifySmallNarrowBandRegions( + const RenderBuffer& render_buffer, + const absl::optional& delay_partitions, + std::array* narrow_band_counters) { + RTC_DCHECK(narrow_band_counters); + + if (!delay_partitions) { + narrow_band_counters->fill(0); + return; + } + + std::array channel_counters; + channel_counters.fill(0); + rtc::ArrayView> X2 = + render_buffer.Spectrum(*delay_partitions); + for (size_t ch = 0; ch < X2.size(); ++ch) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[ch][k] > 3 * std::max(X2[ch][k - 1], X2[ch][k + 1])) { + ++channel_counters[k - 1]; + } + } + } + for (size_t k = 1; k < kFftLengthBy2; ++k) { + (*narrow_band_counters)[k - 1] = + channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0; + } +} + +// Identifies whether the signal has a single strong narrow-band component. +void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer, + int strong_peak_freeze_duration, + absl::optional* narrow_peak_band, + size_t* narrow_peak_counter) { + RTC_DCHECK(narrow_peak_band); + RTC_DCHECK(narrow_peak_counter); + if (*narrow_peak_band && + ++(*narrow_peak_counter) > + static_cast(strong_peak_freeze_duration)) { + *narrow_peak_band = absl::nullopt; + } + + const Block& x_latest = render_buffer.GetBlock(0); + float max_peak_level = 0.f; + for (int channel = 0; channel < x_latest.NumChannels(); ++channel) { + rtc::ArrayView X2_latest = + render_buffer.Spectrum(0)[channel]; + + // Identify the spectral peak. + const int peak_bin = + static_cast(std::max_element(X2_latest.begin(), X2_latest.end()) - + X2_latest.begin()); + + // Compute the level around the peak. + float non_peak_power = 0.f; + for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + for (int k = peak_bin + 5; + k < std::min(peak_bin + 15, static_cast(kFftLengthBy2Plus1)); + ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + + // Assess the render signal strength. + auto result0 = std::minmax_element(x_latest.begin(/*band=*/0, channel), + x_latest.end(/*band=*/0, channel)); + float max_abs = std::max(fabs(*result0.first), fabs(*result0.second)); + + if (x_latest.NumBands() > 1) { + const auto result1 = + std::minmax_element(x_latest.begin(/*band=*/1, channel), + x_latest.end(/*band=*/1, channel)); + max_abs = + std::max(max_abs, static_cast(std::max( + fabs(*result1.first), fabs(*result1.second)))); + } + + // Detect whether the spectral peak has as strong narrowband nature. + const float peak_level = X2_latest[peak_bin]; + if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) { + // Store the strongest peak across channels. + if (peak_level > max_peak_level) { + max_peak_level = peak_level; + *narrow_peak_band = peak_bin; + *narrow_peak_counter = 0; + } + } + } +} + +} // namespace + +RenderSignalAnalyzer::RenderSignalAnalyzer(const EchoCanceller3Config& config) + : strong_peak_freeze_duration_(config.filter.refined.length_blocks) { + narrow_band_counters_.fill(0); +} +RenderSignalAnalyzer::~RenderSignalAnalyzer() = default; + +void RenderSignalAnalyzer::Update( + const RenderBuffer& render_buffer, + const absl::optional& delay_partitions) { + // Identify bands of narrow nature. + IdentifySmallNarrowBandRegions(render_buffer, delay_partitions, + &narrow_band_counters_); + + // Identify the presence of a strong narrow band. + IdentifyStrongNarrowBandComponent(render_buffer, strong_peak_freeze_duration_, + &narrow_peak_band_, &narrow_peak_counter_); +} + +void RenderSignalAnalyzer::MaskRegionsAroundNarrowBands( + std::array* v) const { + RTC_DCHECK(v); + + // Set v to zero around narrow band signal regions. + if (narrow_band_counters_[0] > kCounterThreshold) { + (*v)[1] = (*v)[0] = 0.f; + } + for (size_t k = 2; k < kFftLengthBy2 - 1; ++k) { + if (narrow_band_counters_[k - 1] > kCounterThreshold) { + (*v)[k - 2] = (*v)[k - 1] = (*v)[k] = (*v)[k + 1] = (*v)[k + 2] = 0.f; + } + } + if (narrow_band_counters_[kFftLengthBy2 - 2] > kCounterThreshold) { + (*v)[kFftLengthBy2] = (*v)[kFftLengthBy2 - 1] = 0.f; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h new file mode 100644 index 0000000000..2e4aaa4ba7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Provides functionality for analyzing the properties of the render signal. +class RenderSignalAnalyzer { + public: + explicit RenderSignalAnalyzer(const EchoCanceller3Config& config); + ~RenderSignalAnalyzer(); + + RenderSignalAnalyzer(const RenderSignalAnalyzer&) = delete; + RenderSignalAnalyzer& operator=(const RenderSignalAnalyzer&) = delete; + + // Updates the render signal analysis with the most recent render signal. + void Update(const RenderBuffer& render_buffer, + const absl::optional& delay_partitions); + + // Returns true if the render signal is poorly exciting. + bool PoorSignalExcitation() const { + RTC_DCHECK_LT(2, narrow_band_counters_.size()); + return std::any_of(narrow_band_counters_.begin(), + narrow_band_counters_.end(), + [](size_t a) { return a > 10; }); + } + + // Zeros the array around regions with narrow bands signal characteristics. + void MaskRegionsAroundNarrowBands( + std::array* v) const; + + absl::optional NarrowPeakBand() const { return narrow_peak_band_; } + + private: + const int strong_peak_freeze_duration_; + std::array narrow_band_counters_; + absl::optional narrow_peak_band_; + size_t narrow_peak_counter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc new file mode 100644 index 0000000000..16f6280cb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoidInNoise(int sample_rate_hz, + size_t sinusoid_channel, + float sinusoidal_frequency_hz, + Random* random_generator, + size_t* sample_counter, + Block* x) { + // Fill x with low-amplitude noise. + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RandomizeSampleVector(random_generator, x->View(band, channel), + /*amplitude=*/500.f); + } + } + // Produce a sinusoid of the specified frequency in the specified channel. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + x->View(/*band=*/0, sinusoid_channel)[j] += + 32000.f * + std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + *sample_counter = *sample_counter + kBlockSize; +} + +void RunNarrowBandDetectionTest(size_t num_channels) { + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + Random random_generator(42U); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + Block x(kNumBands, num_channels); + std::array x_old; + Aec3Fft fft; + EchoCanceller3Config config; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_channels)); + + std::array mask; + x_old.fill(0.f); + constexpr int kSinusFrequencyBin = 32; + + auto generate_sinusoid_test = [&](bool known_delay) { + size_t sample_counter = 0; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoidInNoise(16000, num_channels - 1, + 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, + &random_generator, &sample_counter, &x); + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + known_delay ? absl::optional(0) : absl::nullopt); + } + }; + + generate_sinusoid_test(true); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + for (int k = 0; k < static_cast(mask.size()); ++k) { + EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]); + } + EXPECT_TRUE(analyzer.PoorSignalExcitation()); + EXPECT_TRUE(static_cast(analyzer.NarrowPeakBand())); + EXPECT_EQ(*analyzer.NarrowPeakBand(), 32); + + // Verify that no bands are detected as narrow when the delay is unknown. + generate_sinusoid_test(false); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + std::for_each(mask.begin(), mask.end(), [](float a) { EXPECT_EQ(1.f, a); }); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); +} + +std::string ProduceDebugText(size_t num_channels) { + rtc::StringBuilder ss; + ss << "number of channels: " << num_channels; + return ss.Release(); +} +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null output parameter works. +TEST(RenderSignalAnalyzerDeathTest, NullMaskOutput) { + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), ""); +} + +#endif + +// Verify that no narrow bands are detected in a Gaussian noise signal. +TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + Random random_generator(42U); + Block x(3, num_channels); + std::array x_old; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, num_channels)); + std::array mask; + x_old.fill(0.f); + + for (int k = 0; k < 100; ++k) { + for (int band = 0; band < x.NumBands(); ++band) { + for (int channel = 0; channel < x.NumChannels(); ++channel) { + RandomizeSampleVector(&random_generator, x.View(band, channel)); + } + } + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + absl::optional(0)); + } + + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + EXPECT_TRUE(std::all_of(mask.begin(), mask.end(), + [](float a) { return a == 1.f; })); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); + EXPECT_FALSE(static_cast(analyzer.NarrowPeakBand())); + } +} + +// Verify that a sinusoid signal is detected as narrow bands. +TEST(RenderSignalAnalyzer, NarrowBandDetection) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RunNarrowBandDetectionTest(num_channels); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc new file mode 100644 index 0000000000..640a3e3cb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/reverb_model.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr float kDefaultTransparentModeGain = 0.01f; + +float GetTransparentModeGain() { + return kDefaultTransparentModeGain; +} + +float GetEarlyReflectionsDefaultModeGain( + const EchoCanceller3Config::EpStrength& config) { + if (field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain")) { + return 0.1f; + } + return config.default_gain; +} + +float GetLateReflectionsDefaultModeGain( + const EchoCanceller3Config::EpStrength& config) { + if (field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain")) { + return 0.1f; + } + return config.default_gain; +} + +bool UseErleOnsetCompensationInDominantNearend( + const EchoCanceller3Config::EpStrength& config) { + return config.erle_onset_compensation_in_dominant_nearend || + field_trial::IsEnabled( + "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend"); +} + +// Computes the indexes that will be used for computing spectral power over +// the blocks surrounding the delay. +void GetRenderIndexesToAnalyze( + const SpectrumBuffer& spectrum_buffer, + const EchoCanceller3Config::EchoModel& echo_model, + int filter_delay_blocks, + int* idx_start, + int* idx_stop) { + RTC_DCHECK(idx_start); + RTC_DCHECK(idx_stop); + size_t window_start; + size_t window_end; + window_start = + std::max(0, filter_delay_blocks - + static_cast(echo_model.render_pre_window_size)); + window_end = filter_delay_blocks + + static_cast(echo_model.render_post_window_size); + *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start); + *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1); +} + +// Estimates the residual echo power based on the echo return loss enhancement +// (ERLE) and the linear power estimate. +void LinearEstimate( + rtc::ArrayView> S2_linear, + rtc::ArrayView> erle, + rtc::ArrayView> R2) { + RTC_DCHECK_EQ(S2_linear.size(), erle.size()); + RTC_DCHECK_EQ(S2_linear.size(), R2.size()); + + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + RTC_DCHECK_LT(0.f, erle[ch][k]); + R2[ch][k] = S2_linear[ch][k] / erle[ch][k]; + } + } +} + +// Estimates the residual echo power based on the estimate of the echo path +// gain. +void NonLinearEstimate( + float echo_path_gain, + const std::array& X2, + rtc::ArrayView> R2) { + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] = X2[k] * echo_path_gain; + } + } +} + +// Applies a soft noise gate to the echo generating power. +void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config, + rtc::ArrayView X2) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (config.noise_gate_power > X2[k]) { + X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope * + (config.noise_gate_power - X2[k])); + } + } +} + +// Estimates the echo generating signal power as gated maximal power over a +// time window. +void EchoGeneratingPower(size_t num_render_channels, + const SpectrumBuffer& spectrum_buffer, + const EchoCanceller3Config::EchoModel& echo_model, + int filter_delay_blocks, + rtc::ArrayView X2) { + int idx_stop; + int idx_start; + GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks, + &idx_start, &idx_stop); + + std::fill(X2.begin(), X2.end(), 0.f); + if (num_render_channels == 1) { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]); + } + } + } else { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + std::array render_power; + render_power.fill(0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const auto& channel_power = spectrum_buffer.buffer[k][ch]; + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + render_power[j] += channel_power[j]; + } + } + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], render_power[j]); + } + } + } +} + +} // namespace + +ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels) + : config_(config), + num_render_channels_(num_render_channels), + early_reflections_transparent_mode_gain_(GetTransparentModeGain()), + late_reflections_transparent_mode_gain_(GetTransparentModeGain()), + early_reflections_general_gain_( + GetEarlyReflectionsDefaultModeGain(config_.ep_strength)), + late_reflections_general_gain_( + GetLateReflectionsDefaultModeGain(config_.ep_strength)), + erle_onset_compensation_in_dominant_nearend_( + UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) { + Reset(); +} + +ResidualEchoEstimator::~ResidualEchoEstimator() = default; + +void ResidualEchoEstimator::Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + bool dominant_nearend, + rtc::ArrayView> R2, + rtc::ArrayView> R2_unbounded) { + RTC_DCHECK_EQ(R2.size(), Y2.size()); + RTC_DCHECK_EQ(R2.size(), S2_linear.size()); + + const size_t num_capture_channels = R2.size(); + + // Estimate the power of the stationary noise in the render signal. + UpdateRenderNoisePower(render_buffer); + + // Estimate the residual echo power. + if (aec_state.UsableLinearEstimate()) { + // When there is saturated echo, assume the same spectral content as is + // present in the microphone signal. + if (aec_state.SaturatedEcho()) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); + } + } else { + const bool onset_compensated = + erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend; + LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2); + LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded); + } + + UpdateReverb(ReverbType::kLinear, aec_state, render_buffer, + dominant_nearend); + AddReverb(R2); + AddReverb(R2_unbounded); + } else { + const float echo_path_gain = + GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true); + + // When there is saturated echo, assume the same spectral content as is + // present in the microphone signal. + if (aec_state.SaturatedEcho()) { + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin()); + } + } else { + // Estimate the echo generating signal power. + std::array X2; + EchoGeneratingPower(num_render_channels_, + render_buffer.GetSpectrumBuffer(), config_.echo_model, + aec_state.MinDirectPathFilterDelay(), X2); + if (!aec_state.UseStationarityProperties()) { + ApplyNoiseGate(config_.echo_model, X2); + } + + // Subtract the stationary noise power to avoid stationary noise causing + // excessive echo suppression. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k]; + X2[k] = std::max(0.f, X2[k]); + } + + NonLinearEstimate(echo_path_gain, X2, R2); + NonLinearEstimate(echo_path_gain, X2, R2_unbounded); + } + + if (config_.echo_model.model_reverb_in_nonlinear_mode && + !aec_state.TransparentModeActive()) { + UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer, + dominant_nearend); + AddReverb(R2); + AddReverb(R2_unbounded); + } + } + + if (aec_state.UseStationarityProperties()) { + // Scale the echo according to echo audibility. + std::array residual_scaling; + aec_state.GetResidualEchoScaling(residual_scaling); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] *= residual_scaling[k]; + R2_unbounded[ch][k] *= residual_scaling[k]; + } + } + } +} + +void ResidualEchoEstimator::Reset() { + echo_reverb_.Reset(); + X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold); + X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power); +} + +void ResidualEchoEstimator::UpdateRenderNoisePower( + const RenderBuffer& render_buffer) { + std::array render_power_data; + rtc::ArrayView> X2 = + render_buffer.Spectrum(0); + rtc::ArrayView render_power = + X2[/*channel=*/0]; + if (num_render_channels_ > 1) { + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = X2[ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; + } + } + render_power = render_power_data; + } + + // Estimate the stationary noise power in a minimum statistics manner. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + // Decrease rapidly. + if (render_power[k] < X2_noise_floor_[k]) { + X2_noise_floor_[k] = render_power[k]; + X2_noise_floor_counter_[k] = 0; + } else { + // Increase in a delayed, leaky manner. + if (X2_noise_floor_counter_[k] >= + static_cast(config_.echo_model.noise_floor_hold)) { + X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f, + config_.echo_model.min_noise_floor_power); + } else { + ++X2_noise_floor_counter_[k]; + } + } + } +} + +// Updates the reverb estimation. +void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + bool dominant_nearend) { + // Choose reverb partition based on what type of echo power model is used. + const size_t first_reverb_partition = + reverb_type == ReverbType::kLinear + ? aec_state.FilterLengthBlocks() + 1 + : aec_state.MinDirectPathFilterDelay() + 1; + + // Compute render power for the reverb. + std::array render_power_data; + rtc::ArrayView> X2 = + render_buffer.Spectrum(first_reverb_partition); + rtc::ArrayView render_power = + X2[/*channel=*/0]; + if (num_render_channels_ > 1) { + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = X2[ch]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; + } + } + render_power = render_power_data; + } + + // Update the reverb estimate. + float reverb_decay = aec_state.ReverbDecay(/*mild=*/dominant_nearend); + if (reverb_type == ReverbType::kLinear) { + echo_reverb_.UpdateReverb( + render_power, aec_state.GetReverbFrequencyResponse(), reverb_decay); + } else { + const float echo_path_gain = + GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false); + echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain, + reverb_decay); + } +} +// Adds the estimated power of the reverb to the residual echo power. +void ResidualEchoEstimator::AddReverb( + rtc::ArrayView> R2) const { + const size_t num_capture_channels = R2.size(); + + // Add the reverb power. + rtc::ArrayView reverb_power = + echo_reverb_.reverb(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] += reverb_power[k]; + } + } +} + +// Chooses the echo path gain to use. +float ResidualEchoEstimator::GetEchoPathGain( + const AecState& aec_state, + bool gain_for_early_reflections) const { + float gain_amplitude; + if (aec_state.TransparentModeActive()) { + gain_amplitude = gain_for_early_reflections + ? early_reflections_transparent_mode_gain_ + : late_reflections_transparent_mode_gain_; + } else { + gain_amplitude = gain_for_early_reflections + ? early_reflections_general_gain_ + : late_reflections_general_gain_; + } + return gain_amplitude * gain_amplitude; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h new file mode 100644 index 0000000000..c468764002 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/reverb_model.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class ResidualEchoEstimator { + public: + ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels); + ~ResidualEchoEstimator(); + + ResidualEchoEstimator(const ResidualEchoEstimator&) = delete; + ResidualEchoEstimator& operator=(const ResidualEchoEstimator&) = delete; + + void Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + bool dominant_nearend, + rtc::ArrayView> R2, + rtc::ArrayView> R2_unbounded); + + private: + enum class ReverbType { kLinear, kNonLinear }; + + // Resets the state. + void Reset(); + + // Updates estimate for the power of the stationary noise component in the + // render signal. + void UpdateRenderNoisePower(const RenderBuffer& render_buffer); + + // Updates the reverb estimation. + void UpdateReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + bool dominant_nearend); + + // Adds the estimated unmodelled echo power to the residual echo power + // estimate. + void AddReverb( + rtc::ArrayView> R2) const; + + // Gets the echo path gain to apply. + float GetEchoPathGain(const AecState& aec_state, + bool gain_for_early_reflections) const; + + const EchoCanceller3Config config_; + const size_t num_render_channels_; + const float early_reflections_transparent_mode_gain_; + const float late_reflections_transparent_mode_gain_; + const float early_reflections_general_gain_; + const float late_reflections_general_gain_; + const bool erle_onset_compensation_in_dominant_nearend_; + std::array X2_noise_floor_; + std::array X2_noise_floor_counter_; + ReverbModel echo_reverb_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc new file mode 100644 index 0000000000..9a7bf0a89c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { +constexpr int kSampleRateHz = 48000; +constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); +constexpr float kEpsilon = 1e-4f; +} // namespace + +class ResidualEchoEstimatorTest { + public: + ResidualEchoEstimatorTest(size_t num_render_channels, + size_t num_capture_channels, + const EchoCanceller3Config& config) + : num_render_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + config_(config), + estimator_(config_, num_render_channels_), + aec_state_(config_, num_capture_channels_), + render_delay_buffer_(RenderDelayBuffer::Create(config_, + kSampleRateHz, + num_render_channels_)), + E2_refined_(num_capture_channels_), + S2_linear_(num_capture_channels_), + Y2_(num_capture_channels_), + R2_(num_capture_channels_), + R2_unbounded_(num_capture_channels_), + x_(kNumBands, num_render_channels_), + H2_(num_capture_channels_, + std::vector>(10)), + h_(num_capture_channels_, + std::vector( + GetTimeDomainLength(config_.filter.refined.length_blocks), + 0.0f)), + random_generator_(42U), + output_(num_capture_channels_) { + for (auto& H2_ch : H2_) { + for (auto& H2_k : H2_ch) { + H2_k.fill(0.01f); + } + H2_ch[2].fill(10.f); + H2_ch[2][0] = 0.1f; + } + + for (auto& subtractor_output : output_) { + subtractor_output.Reset(); + subtractor_output.s_refined.fill(100.f); + } + y_.fill(0.f); + + constexpr float kLevel = 10.f; + for (auto& E2_refined_ch : E2_refined_) { + E2_refined_ch.fill(kLevel); + } + S2_linear_[0].fill(kLevel); + for (auto& Y2_ch : Y2_) { + Y2_ch.fill(kLevel); + } + } + + void RunOneFrame(bool dominant_nearend) { + RandomizeSampleVector(&random_generator_, + x_.View(/*band=*/0, /*channel=*/0)); + render_delay_buffer_->Insert(x_); + if (first_frame_) { + render_delay_buffer_->Reset(); + first_frame_ = false; + } + render_delay_buffer_->PrepareCaptureProcessing(); + + aec_state_.Update(delay_estimate_, H2_, h_, + *render_delay_buffer_->GetRenderBuffer(), E2_refined_, + Y2_, output_); + + estimator_.Estimate(aec_state_, *render_delay_buffer_->GetRenderBuffer(), + S2_linear_, Y2_, dominant_nearend, R2_, R2_unbounded_); + } + + rtc::ArrayView> R2() const { + return R2_; + } + + private: + const size_t num_render_channels_; + const size_t num_capture_channels_; + const EchoCanceller3Config& config_; + ResidualEchoEstimator estimator_; + AecState aec_state_; + std::unique_ptr render_delay_buffer_; + std::vector> E2_refined_; + std::vector> S2_linear_; + std::vector> Y2_; + std::vector> R2_; + std::vector> R2_unbounded_; + Block x_; + std::vector>> H2_; + std::vector> h_; + Random random_generator_; + std::vector output_; + std::array y_; + absl::optional delay_estimate_; + bool first_frame_ = true; +}; + +class ResidualEchoEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + ResidualEchoEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); + +TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + EchoCanceller3Config config; + ResidualEchoEstimatorTest residual_echo_estimator_test( + num_render_channels, num_capture_channels, config); + for (int k = 0; k < 1993; ++k) { + residual_echo_estimator_test.RunOneFrame(/*dominant_nearend=*/false); + } +} + +TEST(ResidualEchoEstimatorMultiChannel, ReverbTest) { + const size_t num_render_channels = 1; + const size_t num_capture_channels = 1; + const size_t nFrames = 100; + + EchoCanceller3Config reference_config; + reference_config.ep_strength.default_len = 0.95f; + reference_config.ep_strength.nearend_len = 0.95f; + EchoCanceller3Config config_use_nearend_len = reference_config; + config_use_nearend_len.ep_strength.default_len = 0.95f; + config_use_nearend_len.ep_strength.nearend_len = 0.83f; + + ResidualEchoEstimatorTest reference_residual_echo_estimator_test( + num_render_channels, num_capture_channels, reference_config); + ResidualEchoEstimatorTest use_nearend_len_residual_echo_estimator_test( + num_render_channels, num_capture_channels, config_use_nearend_len); + + std::vector acum_energy_reference_R2(num_capture_channels, 0.0f); + std::vector acum_energy_R2(num_capture_channels, 0.0f); + for (size_t frame = 0; frame < nFrames; ++frame) { + bool dominant_nearend = frame <= nFrames / 2 ? false : true; + reference_residual_echo_estimator_test.RunOneFrame(dominant_nearend); + use_nearend_len_residual_echo_estimator_test.RunOneFrame(dominant_nearend); + const auto& reference_R2 = reference_residual_echo_estimator_test.R2(); + const auto& R2 = use_nearend_len_residual_echo_estimator_test.R2(); + ASSERT_EQ(reference_R2.size(), R2.size()); + for (size_t ch = 0; ch < reference_R2.size(); ++ch) { + float energy_reference_R2 = std::accumulate( + reference_R2[ch].cbegin(), reference_R2[ch].cend(), 0.0f); + float energy_R2 = std::accumulate(R2[ch].cbegin(), R2[ch].cend(), 0.0f); + if (dominant_nearend) { + EXPECT_GE(energy_reference_R2, energy_R2); + } else { + EXPECT_NEAR(energy_reference_R2, energy_R2, kEpsilon); + } + acum_energy_reference_R2[ch] += energy_reference_R2; + acum_energy_R2[ch] += energy_R2; + } + if (frame == nFrames / 2 || frame == nFrames - 1) { + for (size_t ch = 0; ch < acum_energy_reference_R2.size(); ch++) { + if (dominant_nearend) { + EXPECT_GT(acum_energy_reference_R2[ch], acum_energy_R2[ch]); + } else { + EXPECT_NEAR(acum_energy_reference_R2[ch], acum_energy_R2[ch], + kEpsilon); + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc new file mode 100644 index 0000000000..2daf376911 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc @@ -0,0 +1,410 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_decay_estimator.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +constexpr int kEarlyReverbMinSizeBlocks = 3; +constexpr int kBlocksPerSection = 6; +// Linear regression approach assumes symmetric index around 0. +constexpr float kEarlyReverbFirstPointAtLinearRegressors = + -0.5f * kBlocksPerSection * kFftLengthBy2 + 0.5f; + +// Averages the values in a block of size kFftLengthBy2; +float BlockAverage(rtc::ArrayView v, size_t block_index) { + constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2; + const int i = block_index * kFftLengthBy2; + RTC_DCHECK_GE(v.size(), i + kFftLengthBy2); + const float sum = + std::accumulate(v.begin() + i, v.begin() + i + kFftLengthBy2, 0.f); + return sum * kOneByFftLengthBy2; +} + +// Analyzes the gain in a block. +void AnalyzeBlockGain(const std::array& h2, + float floor_gain, + float* previous_gain, + bool* block_adapting, + bool* decaying_gain) { + float gain = std::max(BlockAverage(h2, 0), 1e-32f); + *block_adapting = + *previous_gain > 1.1f * gain || *previous_gain < 0.9f * gain; + *decaying_gain = gain > floor_gain; + *previous_gain = gain; +} + +// Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly. +constexpr float SymmetricArithmetricSum(int N) { + return N * (N * N - 1.0f) * (1.f / 12.f); +} + +// Returns the peak energy of an impulse response. +float BlockEnergyPeak(rtc::ArrayView h, int peak_block) { + RTC_DCHECK_LE((peak_block + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(peak_block, 0); + float peak_value = + *std::max_element(h.begin() + peak_block * kFftLengthBy2, + h.begin() + (peak_block + 1) * kFftLengthBy2, + [](float a, float b) { return a * a < b * b; }); + return peak_value * peak_value; +} + +// Returns the average energy of an impulse response block. +float BlockEnergyAverage(rtc::ArrayView h, int block_index) { + RTC_DCHECK_LE((block_index + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(block_index, 0); + constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2; + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + return std::accumulate(h.begin() + block_index * kFftLengthBy2, + h.begin() + (block_index + 1) * kFftLengthBy2, 0.f, + sum_of_squares) * + kOneByFftLengthBy2; +} + +} // namespace + +ReverbDecayEstimator::ReverbDecayEstimator(const EchoCanceller3Config& config) + : filter_length_blocks_(config.filter.refined.length_blocks), + filter_length_coefficients_(GetTimeDomainLength(filter_length_blocks_)), + use_adaptive_echo_decay_(config.ep_strength.default_len < 0.f), + early_reverb_estimator_(config.filter.refined.length_blocks - + kEarlyReverbMinSizeBlocks), + late_reverb_start_(kEarlyReverbMinSizeBlocks), + late_reverb_end_(kEarlyReverbMinSizeBlocks), + previous_gains_(config.filter.refined.length_blocks, 0.f), + decay_(std::fabs(config.ep_strength.default_len)), + mild_decay_(std::fabs(config.ep_strength.nearend_len)) { + RTC_DCHECK_GT(config.filter.refined.length_blocks, + static_cast(kEarlyReverbMinSizeBlocks)); +} + +ReverbDecayEstimator::~ReverbDecayEstimator() = default; + +void ReverbDecayEstimator::Update(rtc::ArrayView filter, + const absl::optional& filter_quality, + int filter_delay_blocks, + bool usable_linear_filter, + bool stationary_signal) { + const int filter_size = static_cast(filter.size()); + + if (stationary_signal) { + return; + } + + bool estimation_feasible = + filter_delay_blocks <= + filter_length_blocks_ - kEarlyReverbMinSizeBlocks - 1; + estimation_feasible = + estimation_feasible && filter_size == filter_length_coefficients_; + estimation_feasible = estimation_feasible && filter_delay_blocks > 0; + estimation_feasible = estimation_feasible && usable_linear_filter; + + if (!estimation_feasible) { + ResetDecayEstimation(); + return; + } + + if (!use_adaptive_echo_decay_) { + return; + } + + const float new_smoothing = filter_quality ? *filter_quality * 0.2f : 0.f; + smoothing_constant_ = std::max(new_smoothing, smoothing_constant_); + if (smoothing_constant_ == 0.f) { + return; + } + + if (block_to_analyze_ < filter_length_blocks_) { + // Analyze the filter and accumulate data for reverb estimation. + AnalyzeFilter(filter); + ++block_to_analyze_; + } else { + // When the filter is fully analyzed, estimate the reverb decay and reset + // the block_to_analyze_ counter. + EstimateDecay(filter, filter_delay_blocks); + } +} + +void ReverbDecayEstimator::ResetDecayEstimation() { + early_reverb_estimator_.Reset(); + late_reverb_decay_estimator_.Reset(0); + block_to_analyze_ = 0; + estimation_region_candidate_size_ = 0; + estimation_region_identified_ = false; + smoothing_constant_ = 0.f; + late_reverb_start_ = 0; + late_reverb_end_ = 0; +} + +void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView filter, + int peak_block) { + auto& h = filter; + RTC_DCHECK_EQ(0, h.size() % kFftLengthBy2); + + // Reset the block analysis counter. + block_to_analyze_ = + std::min(peak_block + kEarlyReverbMinSizeBlocks, filter_length_blocks_); + + // To estimate the reverb decay, the energy of the first filter section must + // be substantially larger than the last. Also, the first filter section + // energy must not deviate too much from the max peak. + const float first_reverb_gain = BlockEnergyAverage(h, block_to_analyze_); + const size_t h_size_blocks = h.size() >> kFftLengthBy2Log2; + tail_gain_ = BlockEnergyAverage(h, h_size_blocks - 1); + float peak_energy = BlockEnergyPeak(h, peak_block); + const bool sufficient_reverb_decay = first_reverb_gain > 4.f * tail_gain_; + const bool valid_filter = + first_reverb_gain > 2.f * tail_gain_ && peak_energy < 100.f; + + // Estimate the size of the regions with early and late reflections. + const int size_early_reverb = early_reverb_estimator_.Estimate(); + const int size_late_reverb = + std::max(estimation_region_candidate_size_ - size_early_reverb, 0); + + // Only update the reverb decay estimate if the size of the identified late + // reverb is sufficiently large. + if (size_late_reverb >= 5) { + if (valid_filter && late_reverb_decay_estimator_.EstimateAvailable()) { + float decay = std::pow( + 2.0f, late_reverb_decay_estimator_.Estimate() * kFftLengthBy2); + constexpr float kMaxDecay = 0.95f; // ~1 sec min RT60. + constexpr float kMinDecay = 0.02f; // ~15 ms max RT60. + decay = std::max(.97f * decay_, decay); + decay = std::min(decay, kMaxDecay); + decay = std::max(decay, kMinDecay); + decay_ += smoothing_constant_ * (decay - decay_); + } + + // Update length of decay. Must have enough data (number of sections) in + // order to estimate decay rate. + late_reverb_decay_estimator_.Reset(size_late_reverb * kFftLengthBy2); + late_reverb_start_ = + peak_block + kEarlyReverbMinSizeBlocks + size_early_reverb; + late_reverb_end_ = + block_to_analyze_ + estimation_region_candidate_size_ - 1; + } else { + late_reverb_decay_estimator_.Reset(0); + late_reverb_start_ = 0; + late_reverb_end_ = 0; + } + + // Reset variables for the identification of the region for reverb decay + // estimation. + estimation_region_identified_ = !(valid_filter && sufficient_reverb_decay); + estimation_region_candidate_size_ = 0; + + // Stop estimation of the decay until another good filter is received. + smoothing_constant_ = 0.f; + + // Reset early reflections detector. + early_reverb_estimator_.Reset(); +} + +void ReverbDecayEstimator::AnalyzeFilter(rtc::ArrayView filter) { + auto h = rtc::ArrayView( + filter.begin() + block_to_analyze_ * kFftLengthBy2, kFftLengthBy2); + + // Compute squared filter coeffiecients for the block to analyze_; + std::array h2; + std::transform(h.begin(), h.end(), h2.begin(), [](float a) { return a * a; }); + + // Map out the region for estimating the reverb decay. + bool adapting; + bool above_noise_floor; + AnalyzeBlockGain(h2, tail_gain_, &previous_gains_[block_to_analyze_], + &adapting, &above_noise_floor); + + // Count consecutive number of "good" filter sections, where "good" means: + // 1) energy is above noise floor. + // 2) energy of current section has not changed too much from last check. + estimation_region_identified_ = + estimation_region_identified_ || adapting || !above_noise_floor; + if (!estimation_region_identified_) { + ++estimation_region_candidate_size_; + } + + // Accumulate data for reverb decay estimation and for the estimation of early + // reflections. + if (block_to_analyze_ <= late_reverb_end_) { + if (block_to_analyze_ >= late_reverb_start_) { + for (float h2_k : h2) { + float h2_log2 = FastApproxLog2f(h2_k + 1e-10); + late_reverb_decay_estimator_.Accumulate(h2_log2); + early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_); + } + } else { + for (float h2_k : h2) { + float h2_log2 = FastApproxLog2f(h2_k + 1e-10); + early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_); + } + } + } +} + +void ReverbDecayEstimator::Dump(ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_reverb_decay", decay_); + data_dumper->DumpRaw("aec3_reverb_tail_energy", tail_gain_); + data_dumper->DumpRaw("aec3_reverb_alpha", smoothing_constant_); + data_dumper->DumpRaw("aec3_num_reverb_decay_blocks", + late_reverb_end_ - late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_start", late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_end", late_reverb_end_); + early_reverb_estimator_.Dump(data_dumper); +} + +void ReverbDecayEstimator::LateReverbLinearRegressor::Reset( + int num_data_points) { + RTC_DCHECK_LE(0, num_data_points); + RTC_DCHECK_EQ(0, num_data_points % 2); + const int N = num_data_points; + nz_ = 0.f; + // Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly. + nn_ = SymmetricArithmetricSum(N); + // The linear regression approach assumes symmetric index around 0. + count_ = N > 0 ? -N * 0.5f + 0.5f : 0.f; + N_ = N; + n_ = 0; +} + +void ReverbDecayEstimator::LateReverbLinearRegressor::Accumulate(float z) { + nz_ += count_ * z; + ++count_; + ++n_; +} + +float ReverbDecayEstimator::LateReverbLinearRegressor::Estimate() { + RTC_DCHECK(EstimateAvailable()); + if (nn_ == 0.f) { + RTC_DCHECK_NOTREACHED(); + return 0.f; + } + return nz_ / nn_; +} + +ReverbDecayEstimator::EarlyReverbLengthEstimator::EarlyReverbLengthEstimator( + int max_blocks) + : numerators_smooth_(max_blocks - kBlocksPerSection, 0.f), + numerators_(numerators_smooth_.size(), 0.f), + coefficients_counter_(0) { + RTC_DCHECK_LE(0, max_blocks); +} + +ReverbDecayEstimator::EarlyReverbLengthEstimator:: + ~EarlyReverbLengthEstimator() = default; + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Reset() { + coefficients_counter_ = 0; + std::fill(numerators_.begin(), numerators_.end(), 0.f); + block_counter_ = 0; +} + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Accumulate( + float value, + float smoothing) { + // Each section is composed by kBlocksPerSection blocks and each section + // overlaps with the next one in (kBlocksPerSection - 1) blocks. For example, + // the first section covers the blocks [0:5], the second covers the blocks + // [1:6] and so on. As a result, for each value, kBlocksPerSection sections + // need to be updated. + int first_section_index = std::max(block_counter_ - kBlocksPerSection + 1, 0); + int last_section_index = + std::min(block_counter_, static_cast(numerators_.size() - 1)); + float x_value = static_cast(coefficients_counter_) + + kEarlyReverbFirstPointAtLinearRegressors; + const float value_to_inc = kFftLengthBy2 * value; + float value_to_add = + x_value * value + (block_counter_ - last_section_index) * value_to_inc; + for (int section = last_section_index; section >= first_section_index; + --section, value_to_add += value_to_inc) { + numerators_[section] += value_to_add; + } + + // Check if this update was the last coefficient of the current block. In that + // case, check if we are at the end of one of the sections and update the + // numerator of the linear regressor that is computed in such section. + if (++coefficients_counter_ == kFftLengthBy2) { + if (block_counter_ >= (kBlocksPerSection - 1)) { + size_t section = block_counter_ - (kBlocksPerSection - 1); + RTC_DCHECK_GT(numerators_.size(), section); + RTC_DCHECK_GT(numerators_smooth_.size(), section); + numerators_smooth_[section] += + smoothing * (numerators_[section] - numerators_smooth_[section]); + n_sections_ = section + 1; + } + ++block_counter_; + coefficients_counter_ = 0; + } +} + +// Estimates the size in blocks of the early reverb. The estimation is done by +// comparing the tilt that is estimated in each section. As an optimization +// detail and due to the fact that all the linear regressors that are computed +// shared the same denominator, the comparison of the tilts is done by a +// comparison of the numerator of the linear regressors. +int ReverbDecayEstimator::EarlyReverbLengthEstimator::Estimate() { + constexpr float N = kBlocksPerSection * kFftLengthBy2; + constexpr float nn = SymmetricArithmetricSum(N); + // numerator_11 refers to the quantity that the linear regressor needs in the + // numerator for getting a decay equal to 1.1 (which is not a decay). + // log2(1.1) * nn / kFftLengthBy2. + constexpr float numerator_11 = 0.13750352374993502f * nn / kFftLengthBy2; + // log2(0.8) * nn / kFftLengthBy2. + constexpr float numerator_08 = -0.32192809488736229f * nn / kFftLengthBy2; + constexpr int kNumSectionsToAnalyze = 9; + + if (n_sections_ < kNumSectionsToAnalyze) { + return 0; + } + + // Estimation of the blocks that correspond to early reverberations. The + // estimation is done by analyzing the impulse response. The portions of the + // impulse response whose energy is not decreasing over its coefficients are + // considered to be part of the early reverberations. Furthermore, the blocks + // where the energy is decreasing faster than what it does at the end of the + // impulse response are also considered to be part of the early + // reverberations. The estimation is limited to the first + // kNumSectionsToAnalyze sections. + + RTC_DCHECK_LE(n_sections_, numerators_smooth_.size()); + const float min_numerator_tail = + *std::min_element(numerators_smooth_.begin() + kNumSectionsToAnalyze, + numerators_smooth_.begin() + n_sections_); + int early_reverb_size_minus_1 = 0; + for (int k = 0; k < kNumSectionsToAnalyze; ++k) { + if ((numerators_smooth_[k] > numerator_11) || + (numerators_smooth_[k] < numerator_08 && + numerators_smooth_[k] < 0.9f * min_numerator_tail)) { + early_reverb_size_minus_1 = k; + } + } + + return early_reverb_size_minus_1 == 0 ? 0 : early_reverb_size_minus_1 + 1; +} + +void ReverbDecayEstimator::EarlyReverbLengthEstimator::Dump( + ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_er_acum_numerator", numerators_smooth_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h new file mode 100644 index 0000000000..fee54210e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_decay_estimator.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kMaxAdaptiveFilter... + +namespace webrtc { + +class ApmDataDumper; +struct EchoCanceller3Config; + +// Class for estimating the decay of the late reverb. +class ReverbDecayEstimator { + public: + explicit ReverbDecayEstimator(const EchoCanceller3Config& config); + ~ReverbDecayEstimator(); + // Updates the decay estimate. + void Update(rtc::ArrayView filter, + const absl::optional& filter_quality, + int filter_delay_blocks, + bool usable_linear_filter, + bool stationary_signal); + // Returns the decay for the exponential model. The parameter `mild` indicates + // which exponential decay to return, the default one or a milder one. + float Decay(bool mild) const { + if (use_adaptive_echo_decay_) { + return decay_; + } else { + return mild ? mild_decay_ : decay_; + } + } + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const; + + private: + void EstimateDecay(rtc::ArrayView filter, int peak_block); + void AnalyzeFilter(rtc::ArrayView filter); + + void ResetDecayEstimation(); + + // Class for estimating the decay of the late reverb from the linear filter. + class LateReverbLinearRegressor { + public: + // Resets the estimator to receive a specified number of data points. + void Reset(int num_data_points); + // Accumulates estimation data. + void Accumulate(float z); + // Estimates the decay. + float Estimate(); + // Returns whether an estimate is available. + bool EstimateAvailable() const { return n_ == N_ && N_ != 0; } + + public: + float nz_ = 0.f; + float nn_ = 0.f; + float count_ = 0.f; + int N_ = 0; + int n_ = 0; + }; + + // Class for identifying the length of the early reverb from the linear + // filter. For identifying the early reverberations, the impulse response is + // divided in sections and the tilt of each section is computed by a linear + // regressor. + class EarlyReverbLengthEstimator { + public: + explicit EarlyReverbLengthEstimator(int max_blocks); + ~EarlyReverbLengthEstimator(); + + // Resets the estimator. + void Reset(); + // Accumulates estimation data. + void Accumulate(float value, float smoothing); + // Estimates the size in blocks of the early reverb. + int Estimate(); + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const; + + private: + std::vector numerators_smooth_; + std::vector numerators_; + int coefficients_counter_; + int block_counter_ = 0; + int n_sections_ = 0; + }; + + const int filter_length_blocks_; + const int filter_length_coefficients_; + const bool use_adaptive_echo_decay_; + LateReverbLinearRegressor late_reverb_decay_estimator_; + EarlyReverbLengthEstimator early_reverb_estimator_; + int late_reverb_start_; + int late_reverb_end_; + int block_to_analyze_ = 0; + int estimation_region_candidate_size_ = 0; + bool estimation_region_identified_ = false; + std::vector previous_gains_; + float decay_; + float mild_decay_; + float tail_gain_ = 0.f; + float smoothing_constant_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc new file mode 100644 index 0000000000..6e7282a1fc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_frequency_response.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Computes the ratio of the energies between the direct path and the tail. The +// energy is computed in the power spectrum domain discarding the DC +// contributions. +float AverageDecayWithinFilter( + rtc::ArrayView freq_resp_direct_path, + rtc::ArrayView freq_resp_tail) { + // Skipping the DC for the ratio computation + constexpr size_t kSkipBins = 1; + RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size()); + + float direct_path_energy = + std::accumulate(freq_resp_direct_path.begin() + kSkipBins, + freq_resp_direct_path.end(), 0.f); + + if (direct_path_energy == 0.f) { + return 0.f; + } + + float tail_energy = std::accumulate(freq_resp_tail.begin() + kSkipBins, + freq_resp_tail.end(), 0.f); + return tail_energy / direct_path_energy; +} + +} // namespace + +ReverbFrequencyResponse::ReverbFrequencyResponse( + bool use_conservative_tail_frequency_response) + : use_conservative_tail_frequency_response_( + use_conservative_tail_frequency_response) { + tail_response_.fill(0.0f); +} + +ReverbFrequencyResponse::~ReverbFrequencyResponse() = default; + +void ReverbFrequencyResponse::Update( + const std::vector>& + frequency_response, + int filter_delay_blocks, + const absl::optional& linear_filter_quality, + bool stationary_block) { + if (stationary_block || !linear_filter_quality) { + return; + } + + Update(frequency_response, filter_delay_blocks, *linear_filter_quality); +} + +void ReverbFrequencyResponse::Update( + const std::vector>& + frequency_response, + int filter_delay_blocks, + float linear_filter_quality) { + rtc::ArrayView freq_resp_tail( + frequency_response[frequency_response.size() - 1]); + + rtc::ArrayView freq_resp_direct_path( + frequency_response[filter_delay_blocks]); + + float average_decay = + AverageDecayWithinFilter(freq_resp_direct_path, freq_resp_tail); + + const float smoothing = 0.2f * linear_filter_quality; + average_decay_ += smoothing * (average_decay - average_decay_); + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + tail_response_[k] = freq_resp_direct_path[k] * average_decay_; + } + + if (use_conservative_tail_frequency_response_) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + tail_response_[k] = std::max(freq_resp_tail[k], tail_response_[k]); + } + } + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + const float avg_neighbour = + 0.5f * (tail_response_[k - 1] + tail_response_[k + 1]); + tail_response_[k] = std::max(tail_response_[k], avg_neighbour); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h new file mode 100644 index 0000000000..69b16b54d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_frequency_response.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Class for updating the frequency response for the reverb. +class ReverbFrequencyResponse { + public: + explicit ReverbFrequencyResponse( + bool use_conservative_tail_frequency_response); + ~ReverbFrequencyResponse(); + + // Updates the frequency response estimate of the reverb. + void Update(const std::vector>& + frequency_response, + int filter_delay_blocks, + const absl::optional& linear_filter_quality, + bool stationary_block); + + // Returns the estimated frequency response for the reverb. + rtc::ArrayView FrequencyResponse() const { + return tail_response_; + } + + private: + void Update(const std::vector>& + frequency_response, + int filter_delay_blocks, + float linear_filter_quality); + + const bool use_conservative_tail_frequency_response_; + float average_decay_ = 0.f; + std::array tail_response_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc new file mode 100644 index 0000000000..e4f3507d31 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model.h" + +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +ReverbModel::ReverbModel() { + Reset(); +} + +ReverbModel::~ReverbModel() = default; + +void ReverbModel::Reset() { + reverb_.fill(0.); +} + +void ReverbModel::UpdateReverbNoFreqShaping( + rtc::ArrayView power_spectrum, + float power_spectrum_scaling, + float reverb_decay) { + if (reverb_decay > 0) { + // Update the estimate of the reverberant power. + for (size_t k = 0; k < power_spectrum.size(); ++k) { + reverb_[k] = (reverb_[k] + power_spectrum[k] * power_spectrum_scaling) * + reverb_decay; + } + } +} + +void ReverbModel::UpdateReverb( + rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, + float reverb_decay) { + if (reverb_decay > 0) { + // Update the estimate of the reverberant power. + for (size_t k = 0; k < power_spectrum.size(); ++k) { + reverb_[k] = + (reverb_[k] + power_spectrum[k] * power_spectrum_scaling[k]) * + reverb_decay; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h new file mode 100644 index 0000000000..5ba54853da --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// The ReverbModel class describes an exponential reverberant model +// that can be applied over power spectrums. +class ReverbModel { + public: + ReverbModel(); + ~ReverbModel(); + + // Resets the state. + void Reset(); + + // Returns the reverb. + rtc::ArrayView reverb() const { + return reverb_; + } + + // The methods UpdateReverbNoFreqShaping and UpdateReverb update the + // estimate of the reverberation contribution to an input/output power + // spectrum. Before applying the exponential reverberant model, the input + // power spectrum is pre-scaled. Use the method UpdateReverb when a different + // scaling should be applied per frequency and UpdateReverb_no_freq_shape if + // the same scaling should be used for all the frequencies. + void UpdateReverbNoFreqShaping(rtc::ArrayView power_spectrum, + float power_spectrum_scaling, + float reverb_decay); + + // Update the reverb based on new data. + void UpdateReverb(rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, + float reverb_decay); + + private: + + std::array reverb_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc new file mode 100644 index 0000000000..5cd7a7870d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model_estimator.h" + +namespace webrtc { + +ReverbModelEstimator::ReverbModelEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels) + : reverb_decay_estimators_(num_capture_channels), + reverb_frequency_responses_( + num_capture_channels, + ReverbFrequencyResponse( + config.ep_strength.use_conservative_tail_frequency_response)) { + for (size_t ch = 0; ch < reverb_decay_estimators_.size(); ++ch) { + reverb_decay_estimators_[ch] = + std::make_unique(config); + } +} + +ReverbModelEstimator::~ReverbModelEstimator() = default; + +void ReverbModelEstimator::Update( + rtc::ArrayView> impulse_responses, + rtc::ArrayView>> + frequency_responses, + rtc::ArrayView> linear_filter_qualities, + rtc::ArrayView filter_delays_blocks, + const std::vector& usable_linear_estimates, + bool stationary_block) { + const size_t num_capture_channels = reverb_decay_estimators_.size(); + RTC_DCHECK_EQ(num_capture_channels, impulse_responses.size()); + RTC_DCHECK_EQ(num_capture_channels, frequency_responses.size()); + RTC_DCHECK_EQ(num_capture_channels, usable_linear_estimates.size()); + + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + // Estimate the frequency response for the reverb. + reverb_frequency_responses_[ch].Update( + frequency_responses[ch], filter_delays_blocks[ch], + linear_filter_qualities[ch], stationary_block); + + // Estimate the reverb decay, + reverb_decay_estimators_[ch]->Update( + impulse_responses[ch], linear_filter_qualities[ch], + filter_delays_blocks[ch], usable_linear_estimates[ch], + stationary_block); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h new file mode 100644 index 0000000000..63bade977f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kFftLengthBy2Plus1 +#include "modules/audio_processing/aec3/reverb_decay_estimator.h" +#include "modules/audio_processing/aec3/reverb_frequency_response.h" + +namespace webrtc { + +class ApmDataDumper; + +// Class for estimating the model parameters for the reverberant echo. +class ReverbModelEstimator { + public: + ReverbModelEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~ReverbModelEstimator(); + + // Updates the estimates based on new data. + void Update( + rtc::ArrayView> impulse_responses, + rtc::ArrayView>> + frequency_responses, + rtc::ArrayView> linear_filter_qualities, + rtc::ArrayView filter_delays_blocks, + const std::vector& usable_linear_estimates, + bool stationary_block); + + // Returns the exponential decay of the reverberant echo. The parameter `mild` + // indicates which exponential decay to return, the default one or a milder + // one. + // TODO(peah): Correct to properly support multiple channels. + float ReverbDecay(bool mild) const { + return reverb_decay_estimators_[0]->Decay(mild); + } + + // Return the frequency response of the reverberant echo. + // TODO(peah): Correct to properly support multiple channels. + rtc::ArrayView GetReverbFrequencyResponse() const { + return reverb_frequency_responses_[0].FrequencyResponse(); + } + + // Dumps debug data. + void Dump(ApmDataDumper* data_dumper) const { + reverb_decay_estimators_[0]->Dump(data_dumper); + } + + private: + std::vector> reverb_decay_estimators_; + std::vector reverb_frequency_responses_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc new file mode 100644 index 0000000000..fb7dcef37f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/reverb_model_estimator_unittest.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/reverb_model_estimator.h" + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +EchoCanceller3Config CreateConfigForTest(float default_decay) { + EchoCanceller3Config cfg; + cfg.ep_strength.default_len = default_decay; + cfg.filter.refined.length_blocks = 40; + return cfg; +} + +constexpr int kFilterDelayBlocks = 2; + +} // namespace + +class ReverbModelEstimatorTest { + public: + ReverbModelEstimatorTest(float default_decay, size_t num_capture_channels) + : aec3_config_(CreateConfigForTest(default_decay)), + estimated_decay_(default_decay), + h_(num_capture_channels, + std::vector( + aec3_config_.filter.refined.length_blocks * kBlockSize, + 0.f)), + H2_(num_capture_channels, + std::vector>( + aec3_config_.filter.refined.length_blocks)), + quality_linear_(num_capture_channels, 1.0f) { + CreateImpulseResponseWithDecay(); + } + void RunEstimator(); + float GetDecay(bool mild) { + return mild ? mild_estimated_decay_ : estimated_decay_; + } + float GetTrueDecay() { return kTruePowerDecay; } + float GetPowerTailDb() { return 10.f * std::log10(estimated_power_tail_); } + float GetTruePowerTailDb() { return 10.f * std::log10(true_power_tail_); } + + private: + void CreateImpulseResponseWithDecay(); + static constexpr bool kStationaryBlock = false; + static constexpr float kTruePowerDecay = 0.5f; + const EchoCanceller3Config aec3_config_; + float estimated_decay_; + float mild_estimated_decay_; + float estimated_power_tail_ = 0.f; + float true_power_tail_ = 0.f; + std::vector> h_; + std::vector>> H2_; + std::vector> quality_linear_; +}; + +void ReverbModelEstimatorTest::CreateImpulseResponseWithDecay() { + const Aec3Fft fft; + for (const auto& h_k : h_) { + RTC_DCHECK_EQ(h_k.size(), + aec3_config_.filter.refined.length_blocks * kBlockSize); + } + for (const auto& H2_k : H2_) { + RTC_DCHECK_EQ(H2_k.size(), aec3_config_.filter.refined.length_blocks); + } + RTC_DCHECK_EQ(kFilterDelayBlocks, 2); + + float decay_sample = std::sqrt(powf(kTruePowerDecay, 1.f / kBlockSize)); + const size_t filter_delay_coefficients = kFilterDelayBlocks * kBlockSize; + for (auto& h_i : h_) { + std::fill(h_i.begin(), h_i.end(), 0.f); + h_i[filter_delay_coefficients] = 1.f; + for (size_t k = filter_delay_coefficients + 1; k < h_i.size(); ++k) { + h_i[k] = h_i[k - 1] * decay_sample; + } + } + + for (size_t ch = 0; ch < H2_.size(); ++ch) { + for (size_t j = 0, k = 0; j < H2_[ch].size(); ++j, k += kBlockSize) { + std::array fft_data; + fft_data.fill(0.f); + std::copy(h_[ch].begin() + k, h_[ch].begin() + k + kBlockSize, + fft_data.begin()); + FftData H_j; + fft.Fft(&fft_data, &H_j); + H_j.Spectrum(Aec3Optimization::kNone, H2_[ch][j]); + } + } + rtc::ArrayView H2_tail(H2_[0][H2_[0].size() - 1]); + true_power_tail_ = std::accumulate(H2_tail.begin(), H2_tail.end(), 0.f); +} +void ReverbModelEstimatorTest::RunEstimator() { + const size_t num_capture_channels = H2_.size(); + constexpr bool kUsableLinearEstimate = true; + ReverbModelEstimator estimator(aec3_config_, num_capture_channels); + std::vector usable_linear_estimates(num_capture_channels, + kUsableLinearEstimate); + std::vector filter_delay_blocks(num_capture_channels, + kFilterDelayBlocks); + for (size_t k = 0; k < 3000; ++k) { + estimator.Update(h_, H2_, quality_linear_, filter_delay_blocks, + usable_linear_estimates, kStationaryBlock); + } + estimated_decay_ = estimator.ReverbDecay(/*mild=*/false); + mild_estimated_decay_ = estimator.ReverbDecay(/*mild=*/true); + auto freq_resp_tail = estimator.GetReverbFrequencyResponse(); + estimated_power_tail_ = + std::accumulate(freq_resp_tail.begin(), freq_resp_tail.end(), 0.f); +} + +TEST(ReverbModelEstimatorTests, NotChangingDecay) { + constexpr float kDefaultDecay = 0.9f; + for (size_t num_capture_channels : {1, 2, 4, 8}) { + ReverbModelEstimatorTest test(kDefaultDecay, num_capture_channels); + test.RunEstimator(); + EXPECT_EQ(test.GetDecay(/*mild=*/false), kDefaultDecay); + EXPECT_EQ(test.GetDecay(/*mild=*/true), + EchoCanceller3Config().ep_strength.nearend_len); + EXPECT_NEAR(test.GetPowerTailDb(), test.GetTruePowerTailDb(), 5.f); + } +} + +TEST(ReverbModelEstimatorTests, ChangingDecay) { + constexpr float kDefaultDecay = -0.9f; + for (size_t num_capture_channels : {1, 2, 4, 8}) { + ReverbModelEstimatorTest test(kDefaultDecay, num_capture_channels); + test.RunEstimator(); + EXPECT_NEAR(test.GetDecay(/*mild=*/false), test.GetTrueDecay(), 0.1f); + EXPECT_NEAR(test.GetDecay(/*mild=*/true), test.GetTrueDecay(), 0.1f); + EXPECT_NEAR(test.GetPowerTailDb(), test.GetTruePowerTailDb(), 5.f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc new file mode 100644 index 0000000000..a5e77092a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -0,0 +1,416 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr std::array + kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1}; + +std::array FormSubbandMap() { + std::array map_band_to_subband; + size_t subband = 1; + for (size_t k = 0; k < map_band_to_subband.size(); ++k) { + RTC_DCHECK_LT(subband, kBandBoundaries.size()); + if (k >= kBandBoundaries[subband]) { + subband++; + RTC_DCHECK_LT(k, kBandBoundaries[subband]); + } + map_band_to_subband[k] = subband - 1; + } + return map_band_to_subband; +} + +// Defines the size in blocks of the sections that are used for dividing the +// linear filter. The sections are split in a non-linear manner so that lower +// sections that typically represent the direct path have a larger resolution +// than the higher sections which typically represent more reverberant acoustic +// paths. +std::vector DefineFilterSectionSizes(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + size_t filter_length_blocks = num_blocks - delay_headroom_blocks; + std::vector section_sizes(num_sections); + size_t remaining_blocks = filter_length_blocks; + size_t remaining_sections = num_sections; + size_t estimator_size = 2; + size_t idx = 0; + while (remaining_sections > 1 && + remaining_blocks > estimator_size * remaining_sections) { + RTC_DCHECK_LT(idx, section_sizes.size()); + section_sizes[idx] = estimator_size; + remaining_blocks -= estimator_size; + remaining_sections--; + estimator_size *= 2; + idx++; + } + + size_t last_groups_size = remaining_blocks / remaining_sections; + for (; idx < num_sections; idx++) { + section_sizes[idx] = last_groups_size; + } + section_sizes[num_sections - 1] += + remaining_blocks - last_groups_size * remaining_sections; + return section_sizes; +} + +// Forms the limits in blocks for each filter section. Those sections +// are used for analyzing the echo estimates and investigating which +// linear filter sections contribute most to the echo estimate energy. +std::vector SetSectionsBoundaries(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + std::vector estimator_boundaries_blocks(num_sections + 1); + if (estimator_boundaries_blocks.size() == 2) { + estimator_boundaries_blocks[0] = 0; + estimator_boundaries_blocks[1] = num_blocks; + return estimator_boundaries_blocks; + } + RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2); + const std::vector section_sizes = + DefineFilterSectionSizes(delay_headroom_blocks, num_blocks, + estimator_boundaries_blocks.size() - 1); + + size_t idx = 0; + size_t current_size_block = 0; + RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size()); + estimator_boundaries_blocks[0] = delay_headroom_blocks; + for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) { + current_size_block++; + if (current_size_block >= section_sizes[idx]) { + idx = idx + 1; + if (idx == section_sizes.size()) { + break; + } + estimator_boundaries_blocks[idx] = k + 1; + current_size_block = 0; + } + } + estimator_boundaries_blocks[section_sizes.size()] = num_blocks; + return estimator_boundaries_blocks; +} + +std::array +SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l); + std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h); + return max_erle; +} + +} // namespace + +SignalDependentErleEstimator::SignalDependentErleEstimator( + const EchoCanceller3Config& config, + size_t num_capture_channels) + : min_erle_(config.erle.min), + num_sections_(config.erle.num_sections), + num_blocks_(config.filter.refined.length_blocks), + delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), + band_to_subband_(FormSubbandMap()), + max_erle_(SetMaxErleSubbands(config.erle.max_l, + config.erle.max_h, + band_to_subband_[kFftLengthBy2 / 2])), + section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, + num_blocks_, + num_sections_)), + use_onset_detection_(config.erle.onset_detection), + erle_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), + S2_section_accum_( + num_capture_channels, + std::vector>(num_sections_)), + erle_estimators_( + num_capture_channels, + std::vector>(num_sections_)), + erle_ref_(num_capture_channels), + correction_factors_( + num_capture_channels, + std::vector>(num_sections_)), + num_updates_(num_capture_channels), + n_active_sections_(num_capture_channels) { + RTC_DCHECK_LE(num_sections_, num_blocks_); + RTC_DCHECK_GE(num_sections_, 1); + Reset(); +} + +SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; + +void SignalDependentErleEstimator::Reset() { + for (size_t ch = 0; ch < erle_.size(); ++ch) { + erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); + for (auto& erle_estimator : erle_estimators_[ch]) { + erle_estimator.fill(min_erle_); + } + erle_ref_[ch].fill(min_erle_); + for (auto& factor : correction_factors_[ch]) { + factor.fill(1.0f); + } + num_updates_[ch].fill(0); + n_active_sections_[ch].fill(0); + } +} + +// Updates the Erle estimate by analyzing the current input signals. It takes +// the render buffer and the filter frequency response in order to do an +// estimation of the number of sections of the linear filter that are needed +// for getting the majority of the energy in the echo estimate. Based on that +// number of sections, it updates the erle estimation by introducing a +// correction factor to the erle that is given as an input to this method. +void SignalDependentErleEstimator::Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses, + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, + const std::vector& converged_filters) { + RTC_DCHECK_GT(num_sections_, 1); + + // Gets the number of filter sections that are needed for achieving 90 % + // of the power spectrum energy of the echo estimate. + ComputeNumberOfActiveFilterSections(render_buffer, + filter_frequency_responses); + + // Updates the correction factors that is used for correcting the erle and + // adapt it to the particular characteristics of the input signal. + UpdateCorrectionFactors(X2, Y2, E2, converged_filters); + + // Applies the correction factor to the input erle for getting a more refined + // erle estimation for the current input signal. + for (size_t ch = 0; ch < erle_.size(); ++ch) { + for (size_t k = 0; k < kFftLengthBy2; ++k) { + RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]); + float correction_factor = + correction_factors_[ch][n_active_sections_[ch][k]] + [band_to_subband_[k]]; + erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); + if (use_onset_detection_) { + erle_onset_compensated_[ch][k] = rtc::SafeClamp( + average_erle_onset_compensated[ch][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); + } + } + } +} + +void SignalDependentErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + for (auto& erle : erle_estimators_[0]) { + data_dumper->DumpRaw("aec3_all_erle", erle); + } + data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]); + for (auto& factor : correction_factors_[0]) { + data_dumper->DumpRaw("aec3_erle_correction_factor", factor); + } +} + +// Estimates for each band the smallest number of sections in the filter that +// together constitute 90% of the estimated echo energy. +void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses) { + RTC_DCHECK_GT(num_sections_, 1); + // Computes an approximation of the power spectrum if the filter would have + // been limited to a certain number of filter sections. + ComputeEchoEstimatePerFilterSection(render_buffer, + filter_frequency_responses); + // For each band, computes the number of filter sections that are needed for + // achieving the 90 % energy in the echo estimate. + ComputeActiveFilterSections(); +} + +void SignalDependentErleEstimator::UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + for (size_t ch = 0; ch < converged_filters.size(); ++ch) { + if (converged_filters[ch]) { + constexpr float kX2BandEnergyThreshold = 44015068.0f; + constexpr float kSmthConstantDecreases = 0.1f; + constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f; + auto subband_powers = [](rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_subbands) { + for (size_t subband = 0; subband < kSubbands; ++subband) { + RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size()); + power_spectrum_subbands[subband] = std::accumulate( + power_spectrum.begin() + kBandBoundaries[subband], + power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f); + } + }; + + std::array X2_subbands, E2_subbands, Y2_subbands; + subband_powers(X2, X2_subbands); + subband_powers(E2[ch], E2_subbands); + subband_powers(Y2[ch], Y2_subbands); + std::array idx_subbands; + for (size_t subband = 0; subband < kSubbands; ++subband) { + // When aggregating the number of active sections in the filter for + // different bands we choose to take the minimum of all of them. As an + // example, if for one of the bands it is the direct path its refined + // contributor to the final echo estimate, we consider the direct path + // is as well the refined contributor for the subband that contains that + // particular band. That aggregate number of sections will be later used + // as the identifier of the erle estimator that needs to be updated. + RTC_DCHECK_LE(kBandBoundaries[subband + 1], + n_active_sections_[ch].size()); + idx_subbands[subband] = *std::min_element( + n_active_sections_[ch].begin() + kBandBoundaries[subband], + n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]); + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + new_erle.fill(0.f); + for (size_t subband = 0; subband < kSubbands; ++subband) { + if (X2_subbands[subband] > kX2BandEnergyThreshold && + E2_subbands[subband] > 0) { + new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband]; + RTC_DCHECK_GT(new_erle[subband], 0); + is_erle_updated[subband] = true; + ++num_updates_[ch][subband]; + } + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_LT(idx, erle_estimators_[ch].size()); + float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_estimators_[ch][idx][subband] += + alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]); + erle_estimators_[ch][idx][subband] = rtc::SafeClamp( + erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + float alpha = new_erle[subband] > erle_ref_[ch][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_ref_[ch][subband] += + alpha * (new_erle[subband] - erle_ref_[ch][subband]); + erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband], + min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + constexpr int kNumUpdateThr = 50; + if (is_erle_updated[subband] && + num_updates_[ch][subband] > kNumUpdateThr) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f); + // Computes the ratio between the erle that is updated using all the + // points and the erle that is updated only on signals that share the + // same number of active filter sections. + float new_correction_factor = + erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband]; + + correction_factors_[ch][idx][subband] += + 0.1f * + (new_correction_factor - correction_factors_[ch][idx][subband]); + } + } + } + } +} + +void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses) { + const SpectrumBuffer& spectrum_render_buffer = + render_buffer.GetSpectrumBuffer(); + const size_t num_render_channels = spectrum_render_buffer.buffer[0].size(); + const size_t num_capture_channels = S2_section_accum_.size(); + const float one_by_num_render_channels = 1.f / num_render_channels; + + RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size()); + + for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) { + RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1, + section_boundaries_blocks_.size()); + size_t idx_render = render_buffer.Position(); + idx_render = spectrum_render_buffer.OffsetIndex( + idx_render, section_boundaries_blocks_[0]); + + for (size_t section = 0; section < num_sections_; ++section) { + std::array X2_section; + std::array H2_section; + X2_section.fill(0.f); + H2_section.fill(0.f); + const size_t block_limit = + std::min(section_boundaries_blocks_[section + 1], + filter_frequency_responses[capture_ch].size()); + for (size_t block = section_boundaries_blocks_[section]; + block < block_limit; ++block) { + for (size_t render_ch = 0; + render_ch < spectrum_render_buffer.buffer[idx_render].size(); + ++render_ch) { + for (size_t k = 0; k < X2_section.size(); ++k) { + X2_section[k] += + spectrum_render_buffer.buffer[idx_render][render_ch][k] * + one_by_num_render_channels; + } + } + std::transform(H2_section.begin(), H2_section.end(), + filter_frequency_responses[capture_ch][block].begin(), + H2_section.begin(), std::plus()); + idx_render = spectrum_render_buffer.IncIndex(idx_render); + } + + std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(), + S2_section_accum_[capture_ch][section].begin(), + std::multiplies()); + } + + for (size_t section = 1; section < num_sections_; ++section) { + std::transform(S2_section_accum_[capture_ch][section - 1].begin(), + S2_section_accum_[capture_ch][section - 1].end(), + S2_section_accum_[capture_ch][section].begin(), + S2_section_accum_[capture_ch][section].begin(), + std::plus()); + } + } +} + +void SignalDependentErleEstimator::ComputeActiveFilterSections() { + for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) { + std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + size_t section = num_sections_; + float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k]; + while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) { + n_active_sections_[ch][k] = --section; + } + } + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h new file mode 100644 index 0000000000..6847c1ab13 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// This class estimates the dependency of the Erle to the input signal. By +// looking at the input signal, an estimation on whether the current echo +// estimate is due to the direct path or to a more reverberant one is performed. +// Once that estimation is done, it is possible to refine the average Erle that +// this class receive as an input. +class SignalDependentErleEstimator { + public: + SignalDependentErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + + ~SignalDependentErleEstimator(); + + void Reset(); + + // Returns the Erle per frequency subband. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; + } + + // Updates the Erle estimate. The Erle that is passed as an input is required + // to be an estimation of the average Erle achieved by the linear filter. + void Update( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, + const std::vector& converged_filters); + + void Dump(const std::unique_ptr& data_dumper) const; + + static constexpr size_t kSubbands = 6; + + private: + void ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses); + + void UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + void ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + rtc::ArrayView>> + filter_frequency_responses); + + void ComputeActiveFilterSections(); + + const float min_erle_; + const size_t num_sections_; + const size_t num_blocks_; + const size_t delay_headroom_blocks_; + const std::array band_to_subband_; + const std::array max_erle_; + const std::vector section_boundaries_blocks_; + const bool use_onset_detection_; + std::vector> erle_; + std::vector> erle_onset_compensated_; + std::vector>> + S2_section_accum_; + std::vector>> erle_estimators_; + std::vector> erle_ref_; + std::vector>> correction_factors_; + std::vector> num_updates_; + std::vector> n_active_sections_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc new file mode 100644 index 0000000000..67927a6c68 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +void GetActiveFrame(Block* x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + for (int band = 0; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + RTC_DCHECK_GE(kBlockSize, frame.size()); + std::copy(frame.begin(), frame.end(), x->begin(band, channel)); + } + } +} + +class TestInputs { + public: + TestInputs(const EchoCanceller3Config& cfg, + size_t num_render_channels, + size_t num_capture_channels); + ~TestInputs(); + const RenderBuffer& GetRenderBuffer() { return *render_buffer_; } + rtc::ArrayView GetX2() { return X2_; } + rtc::ArrayView> GetY2() const { + return Y2_; + } + rtc::ArrayView> GetE2() const { + return E2_; + } + rtc::ArrayView>> + GetH2() const { + return H2_; + } + const std::vector& GetConvergedFilters() const { + return converged_filters_; + } + void Update(); + + private: + void UpdateCurrentPowerSpectra(); + int n_ = 0; + std::unique_ptr render_delay_buffer_; + RenderBuffer* render_buffer_; + std::array X2_; + std::vector> Y2_; + std::vector> E2_; + std::vector>> H2_; + Block x_; + std::vector converged_filters_; +}; + +TestInputs::TestInputs(const EchoCanceller3Config& cfg, + size_t num_render_channels, + size_t num_capture_channels) + : render_delay_buffer_( + RenderDelayBuffer::Create(cfg, 16000, num_render_channels)), + Y2_(num_capture_channels), + E2_(num_capture_channels), + H2_(num_capture_channels, + std::vector>( + cfg.filter.refined.length_blocks)), + x_(1, num_render_channels), + converged_filters_(num_capture_channels, true) { + render_delay_buffer_->AlignFromDelay(4); + render_buffer_ = render_delay_buffer_->GetRenderBuffer(); + for (auto& H2_ch : H2_) { + for (auto& H2_p : H2_ch) { + H2_p.fill(0.f); + } + } + for (auto& H2_p : H2_[0]) { + H2_p.fill(1.f); + } +} + +TestInputs::~TestInputs() = default; + +void TestInputs::Update() { + if (n_ % 2 == 0) { + std::fill(x_.begin(/*band=*/0, /*channel=*/0), + x_.end(/*band=*/0, /*channel=*/0), 0.f); + } else { + GetActiveFrame(&x_); + } + + render_delay_buffer_->Insert(x_); + render_delay_buffer_->PrepareCaptureProcessing(); + UpdateCurrentPowerSpectra(); + ++n_; +} + +void TestInputs::UpdateCurrentPowerSpectra() { + const SpectrumBuffer& spectrum_render_buffer = + render_buffer_->GetSpectrumBuffer(); + size_t idx = render_buffer_->Position(); + size_t prev_idx = spectrum_render_buffer.OffsetIndex(idx, 1); + auto& X2 = spectrum_render_buffer.buffer[idx][/*channel=*/0]; + auto& X2_prev = spectrum_render_buffer.buffer[prev_idx][/*channel=*/0]; + std::copy(X2.begin(), X2.end(), X2_.begin()); + for (size_t ch = 0; ch < Y2_.size(); ++ch) { + RTC_DCHECK_EQ(X2.size(), Y2_[ch].size()); + for (size_t k = 0; k < X2.size(); ++k) { + E2_[ch][k] = 0.01f * X2_prev[k]; + Y2_[ch][k] = X2[k] + E2_[ch][k]; + } + } +} + +} // namespace + +class SignalDependentErleEstimatorMultiChannel + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P(MultiChannel, + SignalDependentErleEstimatorMultiChannel, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); + +TEST_P(SignalDependentErleEstimatorMultiChannel, SweepSettings) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + EchoCanceller3Config cfg; + size_t max_length_blocks = 50; + for (size_t blocks = 1; blocks < max_length_blocks; blocks = blocks + 10) { + for (size_t delay_headroom = 0; delay_headroom < 5; ++delay_headroom) { + for (size_t num_sections = 2; num_sections < max_length_blocks; + ++num_sections) { + cfg.filter.refined.length_blocks = blocks; + cfg.filter.refined_initial.length_blocks = + std::min(cfg.filter.refined_initial.length_blocks, blocks); + cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize; + cfg.erle.num_sections = num_sections; + if (EchoCanceller3Config::Validate(&cfg)) { + SignalDependentErleEstimator s(cfg, num_capture_channels); + std::vector> average_erle( + num_capture_channels); + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } + TestInputs inputs(cfg, num_render_channels, num_capture_channels); + for (size_t n = 0; n < 10; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, + inputs.GetConvergedFilters()); + } + } + } + } + } +} + +TEST_P(SignalDependentErleEstimatorMultiChannel, LongerRun) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + EchoCanceller3Config cfg; + cfg.filter.refined.length_blocks = 2; + cfg.filter.refined_initial.length_blocks = 1; + cfg.delay.delay_headroom_samples = 0; + cfg.delay.hysteresis_limit_blocks = 0; + cfg.erle.num_sections = 2; + EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); + std::vector> average_erle( + num_capture_channels); + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } + SignalDependentErleEstimator s(cfg, num_capture_channels); + TestInputs inputs(cfg, num_render_channels, num_capture_channels); + for (size_t n = 0; n < 200; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, + inputs.GetConvergedFilters()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc new file mode 100644 index 0000000000..fe32ece09c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.cc @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/spectrum_buffer.h" + +#include + +namespace webrtc { + +SpectrumBuffer::SpectrumBuffer(size_t size, size_t num_channels) + : size(static_cast(size)), + buffer(size, + std::vector>(num_channels)) { + for (auto& channel : buffer) { + for (auto& c : channel) { + std::fill(c.begin(), c.end(), 0.f); + } + } +} + +SpectrumBuffer::~SpectrumBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h new file mode 100644 index 0000000000..51e1317f55 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/spectrum_buffer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ + +#include + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Struct for bundling a circular buffer of one dimensional vector objects +// together with the read and write indices. +struct SpectrumBuffer { + SpectrumBuffer(size_t size, size_t num_channels); + ~SpectrumBuffer(); + + int IncIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index < size - 1 ? index + 1 : 0; + } + + int DecIndex(int index) const { + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + return index > 0 ? index - 1 : size - 1; + } + + int OffsetIndex(int index, int offset) const { + RTC_DCHECK_GE(size, offset); + RTC_DCHECK_EQ(buffer.size(), static_cast(size)); + RTC_DCHECK_GE(size + index + offset, 0); + return (size + index + offset) % size; + } + + void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); } + void IncWriteIndex() { write = IncIndex(write); } + void DecWriteIndex() { write = DecIndex(write); } + void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); } + void IncReadIndex() { read = IncIndex(read); } + void DecReadIndex() { read = DecIndex(read); } + + const int size; + std::vector>> buffer; + int write = 0; + int read = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc new file mode 100644 index 0000000000..4d364041b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.cc @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/stationarity_estimator.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/spectrum_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +namespace { +constexpr float kMinNoisePower = 10.f; +constexpr int kHangoverBlocks = kNumBlocksPerSecond / 20; +constexpr int kNBlocksAverageInitPhase = 20; +constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.; +} // namespace + +StationarityEstimator::StationarityEstimator() + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)) { + Reset(); +} + +StationarityEstimator::~StationarityEstimator() = default; + +void StationarityEstimator::Reset() { + noise_.Reset(); + hangovers_.fill(0); + stationarity_flags_.fill(false); +} + +// Update just the noise estimator. Usefull until the delay is known +void StationarityEstimator::UpdateNoiseEstimator( + rtc::ArrayView> spectrum) { + noise_.Update(spectrum); + data_dumper_->DumpRaw("aec3_stationarity_noise_spectrum", noise_.Spectrum()); + data_dumper_->DumpRaw("aec3_stationarity_is_block_stationary", + IsBlockStationary()); +} + +void StationarityEstimator::UpdateStationarityFlags( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView render_reverb_contribution_spectrum, + int idx_current, + int num_lookahead) { + std::array indexes; + int num_lookahead_bounded = std::min(num_lookahead, kWindowLength - 1); + int idx = idx_current; + + if (num_lookahead_bounded < kWindowLength - 1) { + int num_lookback = (kWindowLength - 1) - num_lookahead_bounded; + idx = spectrum_buffer.OffsetIndex(idx_current, num_lookback); + } + // For estimating the stationarity properties of the current frame, the + // power for each band is accumulated for several consecutive spectra in the + // method EstimateBandStationarity. + // In order to avoid getting the indexes of the spectra for every band with + // its associated overhead, those indexes are stored in an array and then use + // when the estimation is done. + indexes[0] = idx; + for (size_t k = 1; k < indexes.size(); ++k) { + indexes[k] = spectrum_buffer.DecIndex(indexes[k - 1]); + } + RTC_DCHECK_EQ( + spectrum_buffer.DecIndex(indexes[kWindowLength - 1]), + spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1))); + + for (size_t k = 0; k < stationarity_flags_.size(); ++k) { + stationarity_flags_[k] = EstimateBandStationarity( + spectrum_buffer, render_reverb_contribution_spectrum, indexes, k); + } + UpdateHangover(); + SmoothStationaryPerFreq(); +} + +bool StationarityEstimator::IsBlockStationary() const { + float acum_stationarity = 0.f; + RTC_DCHECK_EQ(stationarity_flags_.size(), kFftLengthBy2Plus1); + for (size_t band = 0; band < stationarity_flags_.size(); ++band) { + bool st = IsBandStationary(band); + acum_stationarity += static_cast(st); + } + return ((acum_stationarity * (1.f / kFftLengthBy2Plus1)) > 0.75f); +} + +bool StationarityEstimator::EstimateBandStationarity( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView average_reverb, + const std::array& indexes, + size_t band) const { + constexpr float kThrStationarity = 10.f; + float acum_power = 0.f; + const int num_render_channels = + static_cast(spectrum_buffer.buffer[0].size()); + const float one_by_num_channels = 1.f / num_render_channels; + for (auto idx : indexes) { + for (int ch = 0; ch < num_render_channels; ++ch) { + acum_power += spectrum_buffer.buffer[idx][ch][band] * one_by_num_channels; + } + } + acum_power += average_reverb[band]; + float noise = kWindowLength * GetStationarityPowerBand(band); + RTC_CHECK_LT(0.f, noise); + bool stationary = acum_power < kThrStationarity * noise; + data_dumper_->DumpRaw("aec3_stationarity_long_ratio", acum_power / noise); + return stationary; +} + +bool StationarityEstimator::AreAllBandsStationary() { + for (auto b : stationarity_flags_) { + if (!b) + return false; + } + return true; +} + +void StationarityEstimator::UpdateHangover() { + bool reduce_hangover = AreAllBandsStationary(); + for (size_t k = 0; k < stationarity_flags_.size(); ++k) { + if (!stationarity_flags_[k]) { + hangovers_[k] = kHangoverBlocks; + } else if (reduce_hangover) { + hangovers_[k] = std::max(hangovers_[k] - 1, 0); + } + } +} + +void StationarityEstimator::SmoothStationaryPerFreq() { + std::array all_ahead_stationary_smooth; + for (size_t k = 1; k < kFftLengthBy2Plus1 - 1; ++k) { + all_ahead_stationary_smooth[k] = stationarity_flags_[k - 1] && + stationarity_flags_[k] && + stationarity_flags_[k + 1]; + } + + all_ahead_stationary_smooth[0] = all_ahead_stationary_smooth[1]; + all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 1] = + all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 2]; + + stationarity_flags_ = all_ahead_stationary_smooth; +} + +std::atomic StationarityEstimator::instance_count_(0); + +StationarityEstimator::NoiseSpectrum::NoiseSpectrum() { + Reset(); +} + +StationarityEstimator::NoiseSpectrum::~NoiseSpectrum() = default; + +void StationarityEstimator::NoiseSpectrum::Reset() { + block_counter_ = 0; + noise_spectrum_.fill(kMinNoisePower); +} + +void StationarityEstimator::NoiseSpectrum::Update( + rtc::ArrayView> spectrum) { + RTC_DCHECK_LE(1, spectrum[0].size()); + const int num_render_channels = static_cast(spectrum.size()); + + std::array avg_spectrum_data; + rtc::ArrayView avg_spectrum; + if (num_render_channels == 1) { + avg_spectrum = spectrum[0]; + } else { + // For multiple channels, average the channel spectra before passing to the + // noise spectrum estimator. + avg_spectrum = avg_spectrum_data; + std::copy(spectrum[0].begin(), spectrum[0].end(), + avg_spectrum_data.begin()); + for (int ch = 1; ch < num_render_channels; ++ch) { + for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) { + avg_spectrum_data[k] += spectrum[ch][k]; + } + } + + const float one_by_num_channels = 1.f / num_render_channels; + for (size_t k = 1; k < kFftLengthBy2Plus1; ++k) { + avg_spectrum_data[k] *= one_by_num_channels; + } + } + + ++block_counter_; + float alpha = GetAlpha(); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (block_counter_ <= kNBlocksAverageInitPhase) { + noise_spectrum_[k] += (1.f / kNBlocksAverageInitPhase) * avg_spectrum[k]; + } else { + noise_spectrum_[k] = + UpdateBandBySmoothing(avg_spectrum[k], noise_spectrum_[k], alpha); + } + } +} + +float StationarityEstimator::NoiseSpectrum::GetAlpha() const { + constexpr float kAlpha = 0.004f; + constexpr float kAlphaInit = 0.04f; + constexpr float kTiltAlpha = (kAlphaInit - kAlpha) / kNBlocksInitialPhase; + + if (block_counter_ > (kNBlocksInitialPhase + kNBlocksAverageInitPhase)) { + return kAlpha; + } else { + return kAlphaInit - + kTiltAlpha * (block_counter_ - kNBlocksAverageInitPhase); + } +} + +float StationarityEstimator::NoiseSpectrum::UpdateBandBySmoothing( + float power_band, + float power_band_noise, + float alpha) const { + float power_band_noise_updated = power_band_noise; + if (power_band_noise < power_band) { + RTC_DCHECK_GT(power_band, 0.f); + float alpha_inc = alpha * (power_band_noise / power_band); + if (block_counter_ > kNBlocksInitialPhase) { + if (10.f * power_band_noise < power_band) { + alpha_inc *= 0.1f; + } + } + power_band_noise_updated += alpha_inc * (power_band - power_band_noise); + } else { + power_band_noise_updated += alpha * (power_band - power_band_noise); + power_band_noise_updated = + std::max(power_band_noise_updated, kMinNoisePower); + } + return power_band_noise_updated; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h new file mode 100644 index 0000000000..8bcd3b789e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/stationarity_estimator.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" // kFftLengthBy2Plus1... +#include "modules/audio_processing/aec3/reverb_model.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +class ApmDataDumper; +struct SpectrumBuffer; + +class StationarityEstimator { + public: + StationarityEstimator(); + ~StationarityEstimator(); + + // Reset the stationarity estimator. + void Reset(); + + // Update just the noise estimator. Usefull until the delay is known + void UpdateNoiseEstimator( + rtc::ArrayView> spectrum); + + // Update the flag indicating whether this current frame is stationary. For + // getting a more robust estimation, it looks at future and/or past frames. + void UpdateStationarityFlags( + const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView render_reverb_contribution_spectrum, + int idx_current, + int num_lookahead); + + // Returns true if the current band is stationary. + bool IsBandStationary(size_t band) const { + return stationarity_flags_[band] && (hangovers_[band] == 0); + } + + // Returns true if the current block is estimated as stationary. + bool IsBlockStationary() const; + + private: + static constexpr int kWindowLength = 13; + // Returns the power of the stationary noise spectrum at a band. + float GetStationarityPowerBand(size_t k) const { return noise_.Power(k); } + + // Get an estimation of the stationarity for the current band by looking + // at the past/present/future available data. + bool EstimateBandStationarity(const SpectrumBuffer& spectrum_buffer, + rtc::ArrayView average_reverb, + const std::array& indexes, + size_t band) const; + + // True if all bands at the current point are stationary. + bool AreAllBandsStationary(); + + // Update the hangover depending on the stationary status of the current + // frame. + void UpdateHangover(); + + // Smooth the stationarity detection by looking at neighbouring frequency + // bands. + void SmoothStationaryPerFreq(); + + class NoiseSpectrum { + public: + NoiseSpectrum(); + ~NoiseSpectrum(); + + // Reset the noise power spectrum estimate state. + void Reset(); + + // Update the noise power spectrum with a new frame. + void Update( + rtc::ArrayView> spectrum); + + // Get the noise estimation power spectrum. + rtc::ArrayView Spectrum() const { return noise_spectrum_; } + + // Get the noise power spectrum at a certain band. + float Power(size_t band) const { + RTC_DCHECK_LT(band, noise_spectrum_.size()); + return noise_spectrum_[band]; + } + + private: + // Get the update coefficient to be used for the current frame. + float GetAlpha() const; + + // Update the noise power spectrum at a certain band with a new frame. + float UpdateBandBySmoothing(float power_band, + float power_band_noise, + float alpha) const; + std::array noise_spectrum_; + size_t block_counter_; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + NoiseSpectrum noise_; + std::array hangovers_; + std::array stationarity_flags_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc new file mode 100644 index 0000000000..dc7f92fd99 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subband_erle_estimator.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kBlocksToHoldErle = 100; +constexpr int kBlocksForOnsetDetection = kBlocksToHoldErle + 150; +constexpr int kPointsToAccumulate = 6; + +std::array SetMaxErleBands(float max_erle_l, + float max_erle_h) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + kFftLengthBy2 / 2, max_erle_l); + std::fill(max_erle.begin() + kFftLengthBy2 / 2, max_erle.end(), max_erle_h); + return max_erle; +} + +bool EnableMinErleDuringOnsets() { + return !field_trial::IsEnabled("WebRTC-Aec3MinErleDuringOnsetsKillSwitch"); +} + +} // namespace + +SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels) + : use_onset_detection_(config.erle.onset_detection), + min_erle_(config.erle.min), + max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)), + use_min_erle_during_onsets_(EnableMinErleDuringOnsets()), + accum_spectra_(num_capture_channels), + erle_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), + erle_unbounded_(num_capture_channels), + erle_during_onsets_(num_capture_channels), + coming_onset_(num_capture_channels), + hold_counters_(num_capture_channels) { + Reset(); +} + +SubbandErleEstimator::~SubbandErleEstimator() = default; + +void SubbandErleEstimator::Reset() { + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); + erle_unbounded_[ch].fill(min_erle_); + erle_during_onsets_[ch].fill(min_erle_); + coming_onset_[ch].fill(true); + hold_counters_[ch].fill(0); + } + ResetAccumulatedSpectra(); +} + +void SubbandErleEstimator::Update( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + UpdateAccumulatedSpectra(X2, Y2, E2, converged_filters); + UpdateBands(converged_filters); + + if (use_onset_detection_) { + DecreaseErlePerBandForLowRenderSignals(); + } + + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + auto& erle = erle_[ch]; + erle[0] = erle[1]; + erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1]; + + auto& erle_oc = erle_onset_compensated_[ch]; + erle_oc[0] = erle_oc[1]; + erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1]; + + auto& erle_u = erle_unbounded_[ch]; + erle_u[0] = erle_u[1]; + erle_u[kFftLengthBy2] = erle_u[kFftLengthBy2 - 1]; + } +} + +void SubbandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_erle_onset", ErleDuringOnsets()[0]); +} + +void SubbandErleEstimator::UpdateBands( + const std::vector& converged_filters) { + const int num_capture_channels = static_cast(accum_spectra_.Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + if (!converged_filters[ch]) { + continue; + } + + if (accum_spectra_.num_points[ch] != kPointsToAccumulate) { + continue; + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (accum_spectra_.E2[ch][k] > 0.f) { + new_erle[k] = accum_spectra_.Y2[ch][k] / accum_spectra_.E2[ch][k]; + is_erle_updated[k] = true; + } + } + + if (use_onset_detection_) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k] && !accum_spectra_.low_render_energy[ch][k]) { + if (coming_onset_[ch][k]) { + coming_onset_[ch][k] = false; + if (!use_min_erle_during_onsets_) { + float alpha = + new_erle[k] < erle_during_onsets_[ch][k] ? 0.3f : 0.15f; + erle_during_onsets_[ch][k] = rtc::SafeClamp( + erle_during_onsets_[ch][k] + + alpha * (new_erle[k] - erle_during_onsets_[ch][k]), + min_erle_, max_erle_[k]); + } + } + hold_counters_[ch][k] = kBlocksForOnsetDetection; + } + } + } + + auto update_erle_band = [](float& erle, float new_erle, + bool low_render_energy, float min_erle, + float max_erle) { + float alpha = 0.05f; + if (new_erle < erle) { + alpha = low_render_energy ? 0.f : 0.1f; + } + erle = + rtc::SafeClamp(erle + alpha * (new_erle - erle), min_erle, max_erle); + }; + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k]) { + const bool low_render_energy = accum_spectra_.low_render_energy[ch][k]; + update_erle_band(erle_[ch][k], new_erle[k], low_render_energy, + min_erle_, max_erle_[k]); + if (use_onset_detection_) { + update_erle_band(erle_onset_compensated_[ch][k], new_erle[k], + low_render_energy, min_erle_, max_erle_[k]); + } + + // Virtually unbounded ERLE. + constexpr float kUnboundedErleMax = 100000.0f; + update_erle_band(erle_unbounded_[ch][k], new_erle[k], low_render_energy, + min_erle_, kUnboundedErleMax); + } + } + } +} + +void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { + const int num_capture_channels = static_cast(accum_spectra_.Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + --hold_counters_[ch][k]; + if (hold_counters_[ch][k] <= + (kBlocksForOnsetDetection - kBlocksToHoldErle)) { + if (erle_onset_compensated_[ch][k] > erle_during_onsets_[ch][k]) { + erle_onset_compensated_[ch][k] = + std::max(erle_during_onsets_[ch][k], + 0.97f * erle_onset_compensated_[ch][k]); + RTC_DCHECK_LE(min_erle_, erle_onset_compensated_[ch][k]); + } + if (hold_counters_[ch][k] <= 0) { + coming_onset_[ch][k] = true; + hold_counters_[ch][k] = 0; + } + } + } + } +} + +void SubbandErleEstimator::ResetAccumulatedSpectra() { + for (size_t ch = 0; ch < erle_during_onsets_.size(); ++ch) { + accum_spectra_.Y2[ch].fill(0.f); + accum_spectra_.E2[ch].fill(0.f); + accum_spectra_.num_points[ch] = 0; + accum_spectra_.low_render_energy[ch].fill(false); + } +} + +void SubbandErleEstimator::UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters) { + auto& st = accum_spectra_; + RTC_DCHECK_EQ(st.E2.size(), E2.size()); + RTC_DCHECK_EQ(st.E2.size(), E2.size()); + const int num_capture_channels = static_cast(Y2.size()); + for (int ch = 0; ch < num_capture_channels; ++ch) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + if (!converged_filters[ch]) { + continue; + } + + if (st.num_points[ch] == kPointsToAccumulate) { + st.num_points[ch] = 0; + st.Y2[ch].fill(0.f); + st.E2[ch].fill(0.f); + st.low_render_energy[ch].fill(false); + } + + std::transform(Y2[ch].begin(), Y2[ch].end(), st.Y2[ch].begin(), + st.Y2[ch].begin(), std::plus()); + std::transform(E2[ch].begin(), E2[ch].end(), st.E2[ch].begin(), + st.E2[ch].begin(), std::plus()); + + for (size_t k = 0; k < X2.size(); ++k) { + st.low_render_energy[ch][k] = + st.low_render_energy[ch][k] || X2[k] < kX2BandEnergyThreshold; + } + + ++st.num_points[ch]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h new file mode 100644 index 0000000000..8bf9c4d645 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_erle_estimator.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement for each frequency subband. +class SubbandErleEstimator { + public: + SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); + ~SubbandErleEstimator(); + + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimate. + void Update(rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + // Returns the ERLE estimate. + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; + } + + // Returns the non-capped ERLE estimate. + rtc::ArrayView> ErleUnbounded() + const { + return erle_unbounded_; + } + + // Returns the ERLE estimate at onsets (only used for testing). + rtc::ArrayView> ErleDuringOnsets() + const { + return erle_during_onsets_; + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + struct AccumulatedSpectra { + explicit AccumulatedSpectra(size_t num_capture_channels) + : Y2(num_capture_channels), + E2(num_capture_channels), + low_render_energy(num_capture_channels), + num_points(num_capture_channels) {} + std::vector> Y2; + std::vector> E2; + std::vector> low_render_energy; + std::vector num_points; + }; + + void UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView> Y2, + rtc::ArrayView> E2, + const std::vector& converged_filters); + + void ResetAccumulatedSpectra(); + + void UpdateBands(const std::vector& converged_filters); + void DecreaseErlePerBandForLowRenderSignals(); + + const bool use_onset_detection_; + const float min_erle_; + const std::array max_erle_; + const bool use_min_erle_during_onsets_; + AccumulatedSpectra accum_spectra_; + // ERLE without special handling of render onsets. + std::vector> erle_; + // ERLE lowered during render onsets. + std::vector> erle_onset_compensated_; + std::vector> erle_unbounded_; + // Estimation of ERLE during render onsets. + std::vector> erle_during_onsets_; + std::vector> coming_onset_; + std::vector> hold_counters_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc new file mode 100644 index 0000000000..2aa400c3af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subband_nearend_detector.h" + +#include + +namespace webrtc { +SubbandNearendDetector::SubbandNearendDetector( + const EchoCanceller3Config::Suppressor::SubbandNearendDetection& config, + size_t num_capture_channels) + : config_(config), + num_capture_channels_(num_capture_channels), + nearend_smoothers_(num_capture_channels_, + aec3::MovingAverage(kFftLengthBy2Plus1, + config_.nearend_average_blocks)), + one_over_subband_length1_( + 1.f / (config_.subband1.high - config_.subband1.low + 1)), + one_over_subband_length2_( + 1.f / (config_.subband2.high - config_.subband2.low + 1)) {} + +void SubbandNearendDetector::Update( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) { + nearend_state_ = false; + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const std::array& noise = + comfort_noise_spectrum[ch]; + std::array nearend; + nearend_smoothers_[ch].Average(nearend_spectrum[ch], nearend); + + // Noise power of the first region. + float noise_power = + std::accumulate(noise.begin() + config_.subband1.low, + noise.begin() + config_.subband1.high + 1, 0.f) * + one_over_subband_length1_; + + // Nearend power of the first region. + float nearend_power_subband1 = + std::accumulate(nearend.begin() + config_.subband1.low, + nearend.begin() + config_.subband1.high + 1, 0.f) * + one_over_subband_length1_; + + // Nearend power of the second region. + float nearend_power_subband2 = + std::accumulate(nearend.begin() + config_.subband2.low, + nearend.begin() + config_.subband2.high + 1, 0.f) * + one_over_subband_length2_; + + // One channel is sufficient to trigger nearend state. + nearend_state_ = + nearend_state_ || + (nearend_power_subband1 < + config_.nearend_threshold * nearend_power_subband2 && + (nearend_power_subband1 > config_.snr_threshold * noise_power)); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h new file mode 100644 index 0000000000..8357edb65f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subband_nearend_detector.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/nearend_detector.h" + +namespace webrtc { +// Class for selecting whether the suppressor is in the nearend or echo state. +class SubbandNearendDetector : public NearendDetector { + public: + SubbandNearendDetector( + const EchoCanceller3Config::Suppressor::SubbandNearendDetection& config, + size_t num_capture_channels); + + // Returns whether the current state is the nearend state. + bool IsNearendState() const override { return nearend_state_; } + + // Updates the state selection based on latest spectral estimates. + void Update(rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + bool initial_state) override; + + private: + const EchoCanceller3Config::Suppressor::SubbandNearendDetection config_; + const size_t num_capture_channels_; + std::vector nearend_smoothers_; + const float one_over_subband_length1_; + const float one_over_subband_length2_; + bool nearend_state_ = false; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_NEAREND_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc new file mode 100644 index 0000000000..aa36bb272a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.cc @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +bool UseCoarseFilterResetHangover() { + return !field_trial::IsEnabled( + "WebRTC-Aec3CoarseFilterResetHangoverKillSwitch"); +} + +void PredictionError(const Aec3Fft& fft, + const FftData& S, + rtc::ArrayView y, + std::array* e, + std::array* s) { + std::array tmp; + fft.Ifft(S, &tmp); + constexpr float kScale = 1.0f / kFftLengthBy2; + std::transform(y.begin(), y.end(), tmp.begin() + kFftLengthBy2, e->begin(), + [&](float a, float b) { return a - b * kScale; }); + + if (s) { + for (size_t k = 0; k < s->size(); ++k) { + (*s)[k] = kScale * tmp[k + kFftLengthBy2]; + } + } +} + +void ScaleFilterOutput(rtc::ArrayView y, + float factor, + rtc::ArrayView e, + rtc::ArrayView s) { + RTC_DCHECK_EQ(y.size(), e.size()); + RTC_DCHECK_EQ(y.size(), s.size()); + for (size_t k = 0; k < y.size(); ++k) { + s[k] *= factor; + e[k] = y[k] - s[k]; + } +} + +} // namespace + +Subtractor::Subtractor(const EchoCanceller3Config& config, + size_t num_render_channels, + size_t num_capture_channels, + ApmDataDumper* data_dumper, + Aec3Optimization optimization) + : fft_(), + data_dumper_(data_dumper), + optimization_(optimization), + config_(config), + num_capture_channels_(num_capture_channels), + use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()), + refined_filters_(num_capture_channels_), + coarse_filter_(num_capture_channels_), + refined_gains_(num_capture_channels_), + coarse_gains_(num_capture_channels_), + filter_misadjustment_estimators_(num_capture_channels_), + poor_coarse_filter_counters_(num_capture_channels_, 0), + coarse_filter_reset_hangover_(num_capture_channels_, 0), + refined_frequency_responses_( + num_capture_channels_, + std::vector>( + std::max(config_.filter.refined_initial.length_blocks, + config_.filter.refined.length_blocks), + std::array())), + refined_impulse_responses_( + num_capture_channels_, + std::vector(GetTimeDomainLength(std::max( + config_.filter.refined_initial.length_blocks, + config_.filter.refined.length_blocks)), + 0.f)), + coarse_impulse_responses_(0) { + // Set up the storing of coarse impulse responses if data dumping is + // available. + if (ApmDataDumper::IsAvailable()) { + coarse_impulse_responses_.resize(num_capture_channels_); + const size_t filter_size = GetTimeDomainLength( + std::max(config_.filter.coarse_initial.length_blocks, + config_.filter.coarse.length_blocks)); + for (std::vector& impulse_response : coarse_impulse_responses_) { + impulse_response.resize(filter_size, 0.f); + } + } + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_filters_[ch] = std::make_unique( + config_.filter.refined.length_blocks, + config_.filter.refined_initial.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + optimization, data_dumper_); + + coarse_filter_[ch] = std::make_unique( + config_.filter.coarse.length_blocks, + config_.filter.coarse_initial.length_blocks, + config.filter.config_change_duration_blocks, num_render_channels, + optimization, data_dumper_); + refined_gains_[ch] = std::make_unique( + config_.filter.refined_initial, + config_.filter.config_change_duration_blocks); + coarse_gains_[ch] = std::make_unique( + config_.filter.coarse_initial, + config.filter.config_change_duration_blocks); + } + + RTC_DCHECK(data_dumper_); + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + for (auto& H2_k : refined_frequency_responses_[ch]) { + H2_k.fill(0.f); + } + } +} + +Subtractor::~Subtractor() = default; + +void Subtractor::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + const auto full_reset = [&]() { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_filters_[ch]->HandleEchoPathChange(); + coarse_filter_[ch]->HandleEchoPathChange(); + refined_gains_[ch]->HandleEchoPathChange(echo_path_variability); + coarse_gains_[ch]->HandleEchoPathChange(); + refined_gains_[ch]->SetConfig(config_.filter.refined_initial, true); + coarse_gains_[ch]->SetConfig(config_.filter.coarse_initial, true); + refined_filters_[ch]->SetSizePartitions( + config_.filter.refined_initial.length_blocks, true); + coarse_filter_[ch]->SetSizePartitions( + config_.filter.coarse_initial.length_blocks, true); + } + }; + + if (echo_path_variability.delay_change != + EchoPathVariability::DelayAdjustment::kNone) { + full_reset(); + } + + if (echo_path_variability.gain_change) { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_gains_[ch]->HandleEchoPathChange(echo_path_variability); + } + } +} + +void Subtractor::ExitInitialState() { + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + refined_gains_[ch]->SetConfig(config_.filter.refined, false); + coarse_gains_[ch]->SetConfig(config_.filter.coarse, false); + refined_filters_[ch]->SetSizePartitions( + config_.filter.refined.length_blocks, false); + coarse_filter_[ch]->SetSizePartitions(config_.filter.coarse.length_blocks, + false); + } +} + +void Subtractor::Process(const RenderBuffer& render_buffer, + const Block& capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + rtc::ArrayView outputs) { + RTC_DCHECK_EQ(num_capture_channels_, capture.NumChannels()); + + // Compute the render powers. + const bool same_filter_sizes = refined_filters_[0]->SizePartitions() == + coarse_filter_[0]->SizePartitions(); + std::array X2_refined; + std::array X2_coarse_data; + auto& X2_coarse = same_filter_sizes ? X2_refined : X2_coarse_data; + if (same_filter_sizes) { + render_buffer.SpectralSum(refined_filters_[0]->SizePartitions(), + &X2_refined); + } else if (refined_filters_[0]->SizePartitions() > + coarse_filter_[0]->SizePartitions()) { + render_buffer.SpectralSums(coarse_filter_[0]->SizePartitions(), + refined_filters_[0]->SizePartitions(), + &X2_coarse, &X2_refined); + } else { + render_buffer.SpectralSums(refined_filters_[0]->SizePartitions(), + coarse_filter_[0]->SizePartitions(), &X2_refined, + &X2_coarse); + } + + // Process all capture channels + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + SubtractorOutput& output = outputs[ch]; + rtc::ArrayView y = capture.View(/*band=*/0, ch); + FftData& E_refined = output.E_refined; + FftData E_coarse; + std::array& e_refined = output.e_refined; + std::array& e_coarse = output.e_coarse; + + FftData S; + FftData& G = S; + + // Form the outputs of the refined and coarse filters. + refined_filters_[ch]->Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_refined, &output.s_refined); + + coarse_filter_[ch]->Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_coarse, &output.s_coarse); + + // Compute the signal powers in the subtractor output. + output.ComputeMetrics(y); + + // Adjust the filter if needed. + bool refined_filters_adjusted = false; + filter_misadjustment_estimators_[ch].Update(output); + if (filter_misadjustment_estimators_[ch].IsAdjustmentNeeded()) { + float scale = filter_misadjustment_estimators_[ch].GetMisadjustment(); + refined_filters_[ch]->ScaleFilter(scale); + for (auto& h_k : refined_impulse_responses_[ch]) { + h_k *= scale; + } + ScaleFilterOutput(y, scale, e_refined, output.s_refined); + filter_misadjustment_estimators_[ch].Reset(); + refined_filters_adjusted = true; + } + + // Compute the FFts of the refined and coarse filter outputs. + fft_.ZeroPaddedFft(e_refined, Aec3Fft::Window::kHanning, &E_refined); + fft_.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kHanning, &E_coarse); + + // Compute spectra for future use. + E_coarse.Spectrum(optimization_, output.E2_coarse); + E_refined.Spectrum(optimization_, output.E2_refined); + + // Update the refined filter. + if (!refined_filters_adjusted) { + // Do not allow the performance of the coarse filter to affect the + // adaptation speed of the refined filter just after the coarse filter has + // been reset. + const bool disallow_leakage_diverged = + coarse_filter_reset_hangover_[ch] > 0 && + use_coarse_filter_reset_hangover_; + + std::array erl; + ComputeErl(optimization_, refined_frequency_responses_[ch], erl); + refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output, + erl, refined_filters_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), + disallow_leakage_diverged, &G); + } else { + G.re.fill(0.f); + G.im.fill(0.f); + } + refined_filters_[ch]->Adapt(render_buffer, G, + &refined_impulse_responses_[ch]); + refined_filters_[ch]->ComputeFrequencyResponse( + &refined_frequency_responses_[ch]); + + if (ch == 0) { + data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.im); + } + + // Update the coarse filter. + poor_coarse_filter_counters_[ch] = + output.e2_refined < output.e2_coarse + ? poor_coarse_filter_counters_[ch] + 1 + : 0; + if (poor_coarse_filter_counters_[ch] < 5) { + coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse, + coarse_filter_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), &G); + coarse_filter_reset_hangover_[ch] = + std::max(coarse_filter_reset_hangover_[ch] - 1, 0); + } else { + poor_coarse_filter_counters_[ch] = 0; + coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(), + refined_filters_[ch]->GetFilter()); + coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined, + coarse_filter_[ch]->SizePartitions(), + aec_state.SaturatedCapture(), &G); + coarse_filter_reset_hangover_[ch] = + config_.filter.coarse_reset_hangover_blocks; + } + + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_LT(ch, coarse_impulse_responses_.size()); + coarse_filter_[ch]->Adapt(render_buffer, G, + &coarse_impulse_responses_[ch]); + } else { + coarse_filter_[ch]->Adapt(render_buffer, G); + } + + if (ch == 0) { + data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im); + filter_misadjustment_estimators_[ch].Dump(data_dumper_); + DumpFilters(); + } + + std::for_each(e_refined.begin(), e_refined.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + + if (ch == 0) { + data_dumper_->DumpWav("aec3_refined_filters_output", kBlockSize, + &e_refined[0], 16000, 1); + data_dumper_->DumpWav("aec3_coarse_filter_output", kBlockSize, + &e_coarse[0], 16000, 1); + } + } +} + +void Subtractor::FilterMisadjustmentEstimator::Update( + const SubtractorOutput& output) { + e2_acum_ += output.e2_refined; + y2_acum_ += output.y2; + if (++n_blocks_acum_ == n_blocks_) { + if (y2_acum_ > n_blocks_ * 200.f * 200.f * kBlockSize) { + float update = (e2_acum_ / y2_acum_); + if (e2_acum_ > n_blocks_ * 7500.f * 7500.f * kBlockSize) { + // Duration equal to blockSizeMs * n_blocks_ * 4. + overhang_ = 4; + } else { + overhang_ = std::max(overhang_ - 1, 0); + } + + if ((update < inv_misadjustment_) || (overhang_ > 0)) { + inv_misadjustment_ += 0.1f * (update - inv_misadjustment_); + } + } + e2_acum_ = 0.f; + y2_acum_ = 0.f; + n_blocks_acum_ = 0; + } +} + +void Subtractor::FilterMisadjustmentEstimator::Reset() { + e2_acum_ = 0.f; + y2_acum_ = 0.f; + n_blocks_acum_ = 0; + inv_misadjustment_ = 0.f; + overhang_ = 0.f; +} + +void Subtractor::FilterMisadjustmentEstimator::Dump( + ApmDataDumper* data_dumper) const { + data_dumper->DumpRaw("aec3_inv_misadjustment_factor", inv_misadjustment_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h new file mode 100644 index 0000000000..86159a3442 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/coarse_filter_update_gain.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/refined_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Proves linear echo cancellation functionality +class Subtractor { + public: + Subtractor(const EchoCanceller3Config& config, + size_t num_render_channels, + size_t num_capture_channels, + ApmDataDumper* data_dumper, + Aec3Optimization optimization); + ~Subtractor(); + Subtractor(const Subtractor&) = delete; + Subtractor& operator=(const Subtractor&) = delete; + + // Performs the echo subtraction. + void Process(const RenderBuffer& render_buffer, + const Block& capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + rtc::ArrayView outputs); + + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Exits the initial state. + void ExitInitialState(); + + // Returns the block-wise frequency responses for the refined adaptive + // filters. + const std::vector>>& + FilterFrequencyResponses() const { + return refined_frequency_responses_; + } + + // Returns the estimates of the impulse responses for the refined adaptive + // filters. + const std::vector>& FilterImpulseResponses() const { + return refined_impulse_responses_; + } + + void DumpFilters() { + data_dumper_->DumpRaw( + "aec3_subtractor_h_refined", + rtc::ArrayView( + refined_impulse_responses_[0].data(), + GetTimeDomainLength( + refined_filters_[0]->max_filter_size_partitions()))); + if (ApmDataDumper::IsAvailable()) { + RTC_DCHECK_GT(coarse_impulse_responses_.size(), 0); + data_dumper_->DumpRaw( + "aec3_subtractor_h_coarse", + rtc::ArrayView( + coarse_impulse_responses_[0].data(), + GetTimeDomainLength( + coarse_filter_[0]->max_filter_size_partitions()))); + } + + refined_filters_[0]->DumpFilter("aec3_subtractor_H_refined"); + coarse_filter_[0]->DumpFilter("aec3_subtractor_H_coarse"); + } + + private: + class FilterMisadjustmentEstimator { + public: + FilterMisadjustmentEstimator() = default; + ~FilterMisadjustmentEstimator() = default; + // Update the misadjustment estimator. + void Update(const SubtractorOutput& output); + // GetMisadjustment() Returns a recommended scale for the filter so the + // prediction error energy gets closer to the energy that is seen at the + // microphone input. + float GetMisadjustment() const { + RTC_DCHECK_GT(inv_misadjustment_, 0.0f); + // It is not aiming to adjust all the estimated mismatch. Instead, + // it adjusts half of that estimated mismatch. + return 2.f / sqrtf(inv_misadjustment_); + } + // Returns true if the prediciton error energy is significantly larger + // than the microphone signal energy and, therefore, an adjustment is + // recommended. + bool IsAdjustmentNeeded() const { return inv_misadjustment_ > 10.f; } + void Reset(); + void Dump(ApmDataDumper* data_dumper) const; + + private: + const int n_blocks_ = 4; + int n_blocks_acum_ = 0; + float e2_acum_ = 0.f; + float y2_acum_ = 0.f; + float inv_misadjustment_ = 0.f; + int overhang_ = 0.f; + }; + + const Aec3Fft fft_; + ApmDataDumper* data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const bool use_coarse_filter_reset_hangover_; + + std::vector> refined_filters_; + std::vector> coarse_filter_; + std::vector> refined_gains_; + std::vector> coarse_gains_; + std::vector filter_misadjustment_estimators_; + std::vector poor_coarse_filter_counters_; + std::vector coarse_filter_reset_hangover_; + std::vector>> + refined_frequency_responses_; + std::vector> refined_impulse_responses_; + std::vector> coarse_impulse_responses_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc new file mode 100644 index 0000000000..ed80101f06 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor_output.h" + +#include + +namespace webrtc { + +SubtractorOutput::SubtractorOutput() = default; +SubtractorOutput::~SubtractorOutput() = default; + +void SubtractorOutput::Reset() { + s_refined.fill(0.f); + s_coarse.fill(0.f); + e_refined.fill(0.f); + e_coarse.fill(0.f); + E_refined.re.fill(0.f); + E_refined.im.fill(0.f); + E2_refined.fill(0.f); + E2_coarse.fill(0.f); + e2_refined = 0.f; + e2_coarse = 0.f; + s2_refined = 0.f; + s2_coarse = 0.f; + y2 = 0.f; +} + +void SubtractorOutput::ComputeMetrics(rtc::ArrayView y) { + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + e2_refined = + std::accumulate(e_refined.begin(), e_refined.end(), 0.f, sum_of_squares); + e2_coarse = + std::accumulate(e_coarse.begin(), e_coarse.end(), 0.f, sum_of_squares); + s2_refined = + std::accumulate(s_refined.begin(), s_refined.end(), 0.f, sum_of_squares); + s2_coarse = + std::accumulate(s_coarse.begin(), s_coarse.end(), 0.f, sum_of_squares); + + s_refined_max_abs = *std::max_element(s_refined.begin(), s_refined.end()); + s_refined_max_abs = + std::max(s_refined_max_abs, + -(*std::min_element(s_refined.begin(), s_refined.end()))); + + s_coarse_max_abs = *std::max_element(s_coarse.begin(), s_coarse.end()); + s_coarse_max_abs = std::max( + s_coarse_max_abs, -(*std::min_element(s_coarse.begin(), s_coarse.end()))); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h new file mode 100644 index 0000000000..d2d12082c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" + +namespace webrtc { + +// Stores the values being returned from the echo subtractor for a single +// capture channel. +struct SubtractorOutput { + SubtractorOutput(); + ~SubtractorOutput(); + + std::array s_refined; + std::array s_coarse; + std::array e_refined; + std::array e_coarse; + FftData E_refined; + std::array E2_refined; + std::array E2_coarse; + float s2_refined = 0.f; + float s2_coarse = 0.f; + float e2_refined = 0.f; + float e2_coarse = 0.f; + float y2 = 0.f; + float s_refined_max_abs = 0.f; + float s_coarse_max_abs = 0.f; + + // Reset the struct content. + void Reset(); + + // Updates the powers of the signals. + void ComputeMetrics(rtc::ArrayView y); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc new file mode 100644 index 0000000000..baf0600161 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor_output_analyzer.h" + +#include + +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +SubtractorOutputAnalyzer::SubtractorOutputAnalyzer(size_t num_capture_channels) + : filters_converged_(num_capture_channels, false) {} + +void SubtractorOutputAnalyzer::Update( + rtc::ArrayView subtractor_output, + bool* any_filter_converged, + bool* any_coarse_filter_converged, + bool* all_filters_diverged) { + RTC_DCHECK(any_filter_converged); + RTC_DCHECK(all_filters_diverged); + RTC_DCHECK_EQ(subtractor_output.size(), filters_converged_.size()); + + *any_filter_converged = false; + *any_coarse_filter_converged = false; + *all_filters_diverged = true; + + for (size_t ch = 0; ch < subtractor_output.size(); ++ch) { + const float y2 = subtractor_output[ch].y2; + const float e2_refined = subtractor_output[ch].e2_refined; + const float e2_coarse = subtractor_output[ch].e2_coarse; + + constexpr float kConvergenceThreshold = 50 * 50 * kBlockSize; + constexpr float kConvergenceThresholdLowLevel = 20 * 20 * kBlockSize; + bool refined_filter_converged = + e2_refined < 0.5f * y2 && y2 > kConvergenceThreshold; + bool coarse_filter_converged_strict = + e2_coarse < 0.05f * y2 && y2 > kConvergenceThreshold; + bool coarse_filter_converged_relaxed = + e2_coarse < 0.2f * y2 && y2 > kConvergenceThresholdLowLevel; + float min_e2 = std::min(e2_refined, e2_coarse); + bool filter_diverged = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; + filters_converged_[ch] = + refined_filter_converged || coarse_filter_converged_strict; + + *any_filter_converged = *any_filter_converged || filters_converged_[ch]; + *any_coarse_filter_converged = + *any_coarse_filter_converged || coarse_filter_converged_relaxed; + *all_filters_diverged = *all_filters_diverged && filter_diverged; + } +} + +void SubtractorOutputAnalyzer::HandleEchoPathChange() { + std::fill(filters_converged_.begin(), filters_converged_.end(), false); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h new file mode 100644 index 0000000000..32707dbb19 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_output_analyzer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ + +#include + +#include "modules/audio_processing/aec3/subtractor_output.h" + +namespace webrtc { + +// Class for analyzing the properties subtractor output. +class SubtractorOutputAnalyzer { + public: + explicit SubtractorOutputAnalyzer(size_t num_capture_channels); + ~SubtractorOutputAnalyzer() = default; + + // Analyses the subtractor output. + void Update(rtc::ArrayView subtractor_output, + bool* any_filter_converged, + bool* any_coarse_filter_converged, + bool* all_filters_diverged); + + const std::vector& ConvergedFilters() const { + return filters_converged_; + } + + // Handle echo path change. + void HandleEchoPathChange(); + + private: + std::vector filters_converged_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_ANALYZER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc new file mode 100644 index 0000000000..56b9cec9f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::vector RunSubtractorTest( + size_t num_render_channels, + size_t num_capture_channels, + int num_blocks_to_process, + int delay_samples, + int refined_filter_length_blocks, + int coarse_filter_length_blocks, + bool uncorrelated_inputs, + const std::vector& blocks_with_echo_path_changes) { + ApmDataDumper data_dumper(42); + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + EchoCanceller3Config config; + config.filter.refined.length_blocks = refined_filter_length_blocks; + config.filter.coarse.length_blocks = coarse_filter_length_blocks; + + Subtractor subtractor(config, num_render_channels, num_capture_channels, + &data_dumper, DetectOptimization()); + absl::optional delay_estimate; + Block x(kNumBands, num_render_channels); + Block y(/*num_bands=*/1, num_capture_channels); + std::array x_old; + std::vector output(num_capture_channels); + config.delay.default_delay = 1; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels)); + RenderSignalAnalyzer render_signal_analyzer(config); + Random random_generator(42U); + Aec3Fft fft; + std::vector> Y2(num_capture_channels); + std::vector> E2_refined( + num_capture_channels); + std::array E2_coarse; + AecState aec_state(config, num_capture_channels); + x_old.fill(0.f); + for (auto& Y2_ch : Y2) { + Y2_ch.fill(0.f); + } + for (auto& E2_refined_ch : E2_refined) { + E2_refined_ch.fill(0.f); + } + E2_coarse.fill(0.f); + + std::vector>>> delay_buffer( + num_capture_channels); + for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) { + delay_buffer[capture_ch].resize(num_render_channels); + for (size_t render_ch = 0; render_ch < num_render_channels; ++render_ch) { + delay_buffer[capture_ch][render_ch] = + std::make_unique>(delay_samples); + } + } + + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + std::vector> x_hp_filter( + num_render_channels); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch] = + std::make_unique(kHighPassFilterCoefficients, 1); + } + std::vector> y_hp_filter( + num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + y_hp_filter[ch] = + std::make_unique(kHighPassFilterCoefficients, 1); + } + + for (int k = 0; k < num_blocks_to_process; ++k) { + for (size_t render_ch = 0; render_ch < num_render_channels; ++render_ch) { + RandomizeSampleVector(&random_generator, x.View(/*band=*/0, render_ch)); + } + if (uncorrelated_inputs) { + for (size_t capture_ch = 0; capture_ch < num_capture_channels; + ++capture_ch) { + RandomizeSampleVector(&random_generator, + y.View(/*band=*/0, capture_ch)); + } + } else { + for (size_t capture_ch = 0; capture_ch < num_capture_channels; + ++capture_ch) { + rtc::ArrayView y_view = y.View(/*band=*/0, capture_ch); + for (size_t render_ch = 0; render_ch < num_render_channels; + ++render_ch) { + std::array y_channel; + delay_buffer[capture_ch][render_ch]->Delay( + x.View(/*band=*/0, render_ch), y_channel); + for (size_t k = 0; k < kBlockSize; ++k) { + y_view[k] += y_channel[k] / num_render_channels; + } + } + } + } + for (size_t ch = 0; ch < num_render_channels; ++ch) { + x_hp_filter[ch]->Process(x.View(/*band=*/0, ch)); + } + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + y_hp_filter[ch]->Process(y.View(/*band=*/0, ch)); + } + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + aec_state.MinDirectPathFilterDelay()); + + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + subtractor.HandleEchoPathChange(EchoPathVariability( + true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, + false)); + } + subtractor.Process(*render_delay_buffer->GetRenderBuffer(), y, + render_signal_analyzer, aec_state, output); + + aec_state.HandleEchoPathChange(EchoPathVariability( + false, EchoPathVariability::DelayAdjustment::kNone, false)); + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2_refined, Y2, + output); + } + + std::vector results(num_capture_channels); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + const float output_power = std::inner_product( + output[ch].e_refined.begin(), output[ch].e_refined.end(), + output[ch].e_refined.begin(), 0.f); + const float y_power = + std::inner_product(y.begin(/*band=*/0, ch), y.end(/*band=*/0, ch), + y.begin(/*band=*/0, ch), 0.f); + if (y_power == 0.f) { + ADD_FAILURE(); + results[ch] = -1.f; + } + results[ch] = output_power / y_power; + } + return results; +} + +std::string ProduceDebugText(size_t num_render_channels, + size_t num_capture_channels, + size_t delay, + int filter_length_blocks) { + rtc::StringBuilder ss; + ss << "delay: " << delay << ", "; + ss << "filter_length_blocks:" << filter_length_blocks << ", "; + ss << "num_render_channels:" << num_render_channels << ", "; + ss << "num_capture_channels:" << num_capture_channels; + return ss.Release(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non data dumper works. +TEST(SubtractorDeathTest, NullDataDumper) { + EXPECT_DEATH( + Subtractor(EchoCanceller3Config(), 1, 1, nullptr, DetectOptimization()), + ""); +} + +#endif + +// Verifies that the subtractor is able to converge on correlated data. +TEST(Subtractor, Convergence) { + std::vector blocks_with_echo_path_changes; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(1, 1, delay_samples, filter_length_blocks)); + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 2500, delay_samples, filter_length_blocks, filter_length_blocks, + false, blocks_with_echo_path_changes); + + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.1f, echo_to_nearend_power); + } + } + } +} + +// Verifies that the subtractor is able to handle the case when the refined +// filter is longer than the coarse filter. +TEST(Subtractor, RefinedFilterLongerThanCoarseFilter) { + std::vector blocks_with_echo_path_changes; + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 400, 64, 20, 15, false, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.5f, echo_to_nearend_power); + } +} + +// Verifies that the subtractor is able to handle the case when the coarse +// filter is longer than the refined filter. +TEST(Subtractor, CoarseFilterLongerThanRefinedFilter) { + std::vector blocks_with_echo_path_changes; + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 400, 64, 15, 20, false, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.5f, echo_to_nearend_power); + } +} + +// Verifies that the subtractor does not converge on uncorrelated signals. +TEST(Subtractor, NonConvergenceOnUncorrelatedSignals) { + std::vector blocks_with_echo_path_changes; + for (size_t filter_length_blocks : {12, 20, 30}) { + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(1, 1, delay_samples, filter_length_blocks)); + + std::vector echo_to_nearend_powers = RunSubtractorTest( + 1, 1, 3000, delay_samples, filter_length_blocks, filter_length_blocks, + true, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.1); + } + } + } +} + +class SubtractorMultiChannelUpToEightRender + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +#if defined(NDEBUG) +INSTANTIATE_TEST_SUITE_P(NonDebugMultiChannel, + SubtractorMultiChannelUpToEightRender, + ::testing::Combine(::testing::Values(1, 2, 8), + ::testing::Values(1, 2, 4))); +#else +INSTANTIATE_TEST_SUITE_P(DebugMultiChannel, + SubtractorMultiChannelUpToEightRender, + ::testing::Combine(::testing::Values(1, 2), + ::testing::Values(1, 2))); +#endif + +// Verifies that the subtractor is able to converge on correlated data. +TEST_P(SubtractorMultiChannelUpToEightRender, Convergence) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + std::vector blocks_with_echo_path_changes; + size_t num_blocks_to_process = 2500 * num_render_channels; + std::vector echo_to_nearend_powers = RunSubtractorTest( + num_render_channels, num_capture_channels, num_blocks_to_process, 64, 20, + 20, false, blocks_with_echo_path_changes); + + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_GT(0.1f, echo_to_nearend_power); + } +} + +class SubtractorMultiChannelUpToFourRender + : public ::testing::Test, + public ::testing::WithParamInterface> {}; + +#if defined(NDEBUG) +INSTANTIATE_TEST_SUITE_P(NonDebugMultiChannel, + SubtractorMultiChannelUpToFourRender, + ::testing::Combine(::testing::Values(1, 2, 4), + ::testing::Values(1, 2, 4))); +#else +INSTANTIATE_TEST_SUITE_P(DebugMultiChannel, + SubtractorMultiChannelUpToFourRender, + ::testing::Combine(::testing::Values(1, 2), + ::testing::Values(1, 2))); +#endif + +// Verifies that the subtractor does not converge on uncorrelated signals. +TEST_P(SubtractorMultiChannelUpToFourRender, + NonConvergenceOnUncorrelatedSignals) { + const size_t num_render_channels = std::get<0>(GetParam()); + const size_t num_capture_channels = std::get<1>(GetParam()); + + std::vector blocks_with_echo_path_changes; + size_t num_blocks_to_process = 5000 * num_render_channels; + std::vector echo_to_nearend_powers = RunSubtractorTest( + num_render_channels, num_capture_channels, num_blocks_to_process, 64, 20, + 20, true, blocks_with_echo_path_changes); + for (float echo_to_nearend_power : echo_to_nearend_powers) { + EXPECT_LT(.8f, echo_to_nearend_power); + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.25f); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc new file mode 100644 index 0000000000..83ded425d5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/aec3/vector_math.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Hanning window from Matlab command win = sqrt(hanning(128)). +const float kSqrtHanning[kFftLength] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +} // namespace + +SuppressionFilter::SuppressionFilter(Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels) + : optimization_(optimization), + sample_rate_hz_(sample_rate_hz), + num_capture_channels_(num_capture_channels), + fft_(), + e_output_old_(NumBandsForRate(sample_rate_hz_), + std::vector>( + num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + for (size_t b = 0; b < e_output_old_.size(); ++b) { + for (size_t ch = 0; ch < e_output_old_[b].size(); ++ch) { + e_output_old_[b][ch].fill(0.f); + } + } +} + +SuppressionFilter::~SuppressionFilter() = default; + +void SuppressionFilter::ApplyGain( + rtc::ArrayView comfort_noise, + rtc::ArrayView comfort_noise_high_band, + const std::array& suppression_gain, + float high_bands_gain, + rtc::ArrayView E_lowest_band, + Block* e) { + RTC_DCHECK(e); + RTC_DCHECK_EQ(e->NumBands(), NumBandsForRate(sample_rate_hz_)); + + // Comfort noise gain is sqrt(1-g^2), where g is the suppression gain. + std::array noise_gain; + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + noise_gain[i] = 1.f - suppression_gain[i] * suppression_gain[i]; + } + aec3::VectorMath(optimization_).Sqrt(noise_gain); + + const float high_bands_noise_scaling = + 0.4f * std::sqrt(1.f - high_bands_gain * high_bands_gain); + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + FftData E; + + // Analysis filterbank. + E.Assign(E_lowest_band[ch]); + + for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) { + // Apply suppression gains. + float E_real = E.re[i] * suppression_gain[i]; + float E_imag = E.im[i] * suppression_gain[i]; + + // Scale and add the comfort noise. + E.re[i] = E_real + noise_gain[i] * comfort_noise[ch].re[i]; + E.im[i] = E_imag + noise_gain[i] * comfort_noise[ch].im[i]; + } + + // Synthesis filterbank. + std::array e_extended; + constexpr float kIfftNormalization = 2.f / kFftLength; + fft_.Ifft(E, &e_extended); + + auto e0 = e->View(/*band=*/0, ch); + float* e0_old = e_output_old_[0][ch].data(); + + // Window and add the first half of e_extended with the second half of + // e_extended from the previous block. + for (size_t i = 0; i < kFftLengthBy2; ++i) { + float e0_i = e0_old[i] * kSqrtHanning[kFftLengthBy2 + i]; + e0_i += e_extended[i] * kSqrtHanning[i]; + e0[i] = e0_i * kIfftNormalization; + } + + // The second half of e_extended is stored for the succeeding frame. + std::copy(e_extended.begin() + kFftLengthBy2, + e_extended.begin() + kFftLength, + std::begin(e_output_old_[0][ch])); + + // Apply suppression gain to upper bands. + for (int b = 1; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e_band[i] *= high_bands_gain; + } + } + + // Add comfort noise to band 1. + if (e->NumBands() > 1) { + E.Assign(comfort_noise_high_band[ch]); + std::array time_domain_high_band_noise; + fft_.Ifft(E, &time_domain_high_band_noise); + + auto e1 = e->View(/*band=*/1, ch); + const float gain = high_bands_noise_scaling * kIfftNormalization; + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e1[i] += time_domain_high_band_noise[i] * gain; + } + } + + // Delay upper bands to match the delay of the filter bank. + for (int b = 1; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + float* e_band_old = e_output_old_[b][ch].data(); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + std::swap(e_band[i], e_band_old[i]); + } + } + + // Clamp output of all bands. + for (int b = 0; b < e->NumBands(); ++b) { + auto e_band = e->View(b, ch); + for (size_t i = 0; i < kFftLengthBy2; ++i) { + e_band[i] = rtc::SafeClamp(e_band[i], -32768.f, 32767.f); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h new file mode 100644 index 0000000000..c18b2334bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ + +#include +#include + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/block.h" +#include "modules/audio_processing/aec3/fft_data.h" + +namespace webrtc { + +class SuppressionFilter { + public: + SuppressionFilter(Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels_); + ~SuppressionFilter(); + + SuppressionFilter(const SuppressionFilter&) = delete; + SuppressionFilter& operator=(const SuppressionFilter&) = delete; + + void ApplyGain(rtc::ArrayView comfort_noise, + rtc::ArrayView comfort_noise_high_bands, + const std::array& suppression_gain, + float high_bands_gain, + rtc::ArrayView E_lowest_band, + Block* e); + + private: + const Aec3Optimization optimization_; + const int sample_rate_hz_; + const size_t num_capture_channels_; + const Aec3Fft fft_; + std::vector>> e_output_old_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc new file mode 100644 index 0000000000..464f5cfed2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include + +#include +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoid(int sample_rate_hz, + float sinusoidal_frequency_hz, + size_t* sample_counter, + Block* x) { + // Produce a sinusoid of the specified frequency. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + x->View(/*band=*/0, channel)[j] = + 32767.f * + std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + } + *sample_counter = *sample_counter + kBlockSize; + + for (int band = 1; band < x->NumBands(); ++band) { + for (int channel = 0; channel < x->NumChannels(); ++channel) { + std::fill(x->begin(band, channel), x->end(band, channel), 0.f); + } + } +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null suppressor output. +TEST(SuppressionFilterDeathTest, NullOutput) { + std::vector cn(1); + std::vector cn_high_bands(1); + std::vector E(1); + std::array gain; + + EXPECT_DEATH(SuppressionFilter(Aec3Optimization::kNone, 16000, 1) + .ApplyGain(cn, cn_high_bands, gain, 1.0f, E, nullptr), + ""); +} + +// Verifies the check for allowed sample rate. +TEST(SuppressionFilterDeathTest, ProperSampleRate) { + EXPECT_DEATH(SuppressionFilter(Aec3Optimization::kNone, 16001, 1), ""); +} + +#endif + +// Verifies that no comfort noise is added when the gain is 1. +TEST(SuppressionFilter, ComfortNoiseInUnityGain) { + SuppressionFilter filter(Aec3Optimization::kNone, 48000, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array gain; + std::array e_old_; + Aec3Fft fft; + + e_old_.fill(0.f); + gain.fill(1.f); + cn[0].re.fill(1.f); + cn[0].im.fill(1.f); + cn_high_bands[0].re.fill(1.f); + cn_high_bands[0].im.fill(1.f); + + Block e(3, kBlockSize); + Block e_ref = e; + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + + for (int band = 0; band < e.NumBands(); ++band) { + for (int channel = 0; channel < e.NumChannels(); ++channel) { + const auto e_view = e.View(band, channel); + const auto e_ref_view = e_ref.View(band, channel); + for (size_t sample = 0; sample < e_view.size(); ++sample) { + EXPECT_EQ(e_ref_view[sample], e_view[sample]); + } + } + } +} + +// Verifies that the suppressor is able to suppress a signal. +TEST(SuppressionFilter, SignalSuppression) { + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + constexpr size_t kNumChannels = 1; + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array e_old_; + Aec3Fft fft; + std::array gain; + Block e(kNumBands, kNumChannels); + e_old_.fill(0.f); + + gain.fill(1.f); + std::for_each(gain.begin() + 10, gain.end(), [](float& a) { a = 0.f; }); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 40 / kFftLengthBy2 / 2, &sample_counter, &e); + e0_input = std::inner_product(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_input); + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + e0_output = std::inner_product( + e.begin(/*band=*/0, /*channel=*/0), e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_output); + } + + EXPECT_LT(e0_output, e0_input / 1000.f); +} + +// Verifies that the suppressor is able to pass through a desired signal while +// applying suppressing for some frequencies. +TEST(SuppressionFilter, SignalTransparency) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::array e_old_; + Aec3Fft fft; + std::vector cn_high_bands(1); + std::array gain; + Block e(kNumBands, kNumChannels); + e_old_.fill(0.f); + gain.fill(1.f); + std::for_each(gain.begin() + 30, gain.end(), [](float& a) { a = 0.f; }); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 10 / kFftLengthBy2 / 2, &sample_counter, &e); + e0_input = std::inner_product(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_input); + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + e0_output = std::inner_product( + e.begin(/*band=*/0, /*channel=*/0), e.end(/*band=*/0, /*channel=*/0), + e.begin(/*band=*/0, /*channel=*/0), e0_output); + } + + EXPECT_LT(0.9f * e0_input, e0_output); +} + +// Verifies that the suppressor delay. +TEST(SuppressionFilter, Delay) { + constexpr size_t kNumChannels = 1; + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + + SuppressionFilter filter(Aec3Optimization::kNone, kSampleRateHz, 1); + std::vector cn(1); + std::vector cn_high_bands(1); + std::array e_old_; + Aec3Fft fft; + std::array gain; + Block e(kNumBands, kNumChannels); + + gain.fill(1.f); + + cn[0].re.fill(0.f); + cn[0].im.fill(0.f); + cn_high_bands[0].re.fill(0.f); + cn_high_bands[0].im.fill(0.f); + + for (size_t k = 0; k < 100; ++k) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + auto e_view = e.View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + e_view[sample] = k * kBlockSize + sample + channel; + } + } + } + + std::vector E(1); + fft.PaddedFft(e.View(/*band=*/0, /*channel=*/0), e_old_, + Aec3Fft::Window::kSqrtHanning, &E[0]); + std::copy(e.begin(/*band=*/0, /*channel=*/0), + e.end(/*band=*/0, /*channel=*/0), e_old_.begin()); + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, E, &e); + if (k > 2) { + for (size_t band = 0; band < kNumBands; ++band) { + for (size_t channel = 0; channel < kNumChannels; ++channel) { + const auto e_view = e.View(band, channel); + for (size_t sample = 0; sample < kBlockSize; ++sample) { + EXPECT_NEAR(k * kBlockSize + sample - kBlockSize + channel, + e_view[sample], 0.01); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc new file mode 100644 index 0000000000..037dabaabe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.cc @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include +#include + +#include +#include + +#include "modules/audio_processing/aec3/dominant_nearend_detector.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/subband_nearend_detector.h" +#include "modules/audio_processing/aec3/vector_math.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +void LimitLowFrequencyGains(std::array* gain) { + // Limit the low frequency gains to avoid the impact of the high-pass filter + // on the lower-frequency gain influencing the overall achieved gain. + (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]); +} + +void LimitHighFrequencyGains(bool conservative_hf_suppression, + std::array* gain) { + // Limit the high frequency gains to avoid echo leakage due to an imperfect + // filter. + constexpr size_t kFirstBandToLimit = (64 * 2000) / 8000; + const float min_upper_gain = (*gain)[kFirstBandToLimit]; + std::for_each( + gain->begin() + kFirstBandToLimit + 1, gain->end(), + [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); }); + (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1]; + + if (conservative_hf_suppression) { + // Limits the gain in the frequencies for which the adaptive filter has not + // converged. + // TODO(peah): Make adaptive to take the actual filter error into account. + constexpr size_t kUpperAccurateBandPlus1 = 29; + + constexpr float oneByBandsInSum = + 1 / static_cast(kUpperAccurateBandPlus1 - 20); + const float hf_gain_bound = + std::accumulate(gain->begin() + 20, + gain->begin() + kUpperAccurateBandPlus1, 0.f) * + oneByBandsInSum; + + std::for_each( + gain->begin() + kUpperAccurateBandPlus1, gain->end(), + [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); }); + } +} + +// Scales the echo according to assessed audibility at the other end. +void WeightEchoForAudibility(const EchoCanceller3Config& config, + rtc::ArrayView echo, + rtc::ArrayView weighted_echo) { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, echo.size()); + RTC_DCHECK_EQ(kFftLengthBy2Plus1, weighted_echo.size()); + + auto weigh = [](float threshold, float normalizer, size_t begin, size_t end, + rtc::ArrayView echo, + rtc::ArrayView weighted_echo) { + for (size_t k = begin; k < end; ++k) { + if (echo[k] < threshold) { + float tmp = (threshold - echo[k]) * normalizer; + weighted_echo[k] = echo[k] * std::max(0.f, 1.f - tmp * tmp); + } else { + weighted_echo[k] = echo[k]; + } + } + }; + + float threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_lf; + float normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 0, 3, echo, weighted_echo); + + threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_mf; + normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 3, 7, echo, weighted_echo); + + threshold = config.echo_audibility.floor_power * + config.echo_audibility.audibility_threshold_hf; + normalizer = 1.f / (threshold - config.echo_audibility.floor_power); + weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo); +} + +} // namespace + +std::atomic SuppressionGain::instance_count_(0); + +float SuppressionGain::UpperBandsGain( + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const Block& render, + const std::array& low_band_gain) const { + RTC_DCHECK_LT(0, render.NumBands()); + if (render.NumBands() == 1) { + return 1.f; + } + const int num_render_channels = render.NumChannels(); + + if (narrow_peak_band && + (*narrow_peak_band > static_cast(kFftLengthBy2Plus1 - 10))) { + return 0.001f; + } + + constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; + const float gain_below_8_khz = *std::min_element( + low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); + + // Always attenuate the upper bands when there is saturated echo. + if (saturated_echo) { + return std::min(0.001f, gain_below_8_khz); + } + + // Compute the upper and lower band energies. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + float low_band_energy = 0.f; + for (int ch = 0; ch < num_render_channels; ++ch) { + const float channel_energy = + std::accumulate(render.begin(/*band=*/0, ch), + render.end(/*band=*/0, ch), 0.0f, sum_of_squares); + low_band_energy = std::max(low_band_energy, channel_energy); + } + float high_band_energy = 0.f; + for (int k = 1; k < render.NumBands(); ++k) { + for (int ch = 0; ch < num_render_channels; ++ch) { + const float energy = std::accumulate( + render.begin(k, ch), render.end(k, ch), 0.f, sum_of_squares); + high_band_energy = std::max(high_band_energy, energy); + } + } + + // If there is more power in the lower frequencies than the upper frequencies, + // or if the power in upper frequencies is low, do not bound the gain in the + // upper bands. + float anti_howling_gain; + const float activation_threshold = + kBlockSize * config_.suppressor.high_bands_suppression + .anti_howling_activation_threshold; + if (high_band_energy < std::max(low_band_energy, activation_threshold)) { + anti_howling_gain = 1.f; + } else { + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energy); + RTC_DCHECK_NE(0.f, high_band_energy); + anti_howling_gain = + config_.suppressor.high_bands_suppression.anti_howling_gain * + sqrtf(low_band_energy / high_band_energy); + } + + float gain_bound = 1.f; + if (!dominant_nearend_detector_->IsNearendState()) { + // Bound the upper gain during significant echo activity. + const auto& cfg = config_.suppressor.high_bands_suppression; + auto low_frequency_energy = [](rtc::ArrayView spectrum) { + RTC_DCHECK_LE(16, spectrum.size()); + return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); + }; + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + const float echo_sum = low_frequency_energy(echo_spectrum[ch]); + const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]); + if (echo_sum > cfg.enr_threshold * noise_sum) { + gain_bound = cfg.max_gain_during_echo; + break; + } + } + } + + // Choose the gain as the minimum of the lower and upper gains. + return std::min(std::min(gain_below_8_khz, anti_howling_gain), gain_bound); +} + +// Computes the gain to reduce the echo to a non audible level. +void SuppressionGain::GainToNoAudibleEcho( + const std::array& nearend, + const std::array& echo, + const std::array& masker, + std::array* gain) const { + const auto& p = dominant_nearend_detector_->IsNearendState() ? nearend_params_ + : normal_params_; + for (size_t k = 0; k < gain->size(); ++k) { + float enr = echo[k] / (nearend[k] + 1.f); // Echo-to-nearend ratio. + float emr = echo[k] / (masker[k] + 1.f); // Echo-to-masker (noise) ratio. + float g = 1.0f; + if (enr > p.enr_transparent_[k] && emr > p.emr_transparent_[k]) { + g = (p.enr_suppress_[k] - enr) / + (p.enr_suppress_[k] - p.enr_transparent_[k]); + g = std::max(g, p.emr_transparent_[k] / emr); + } + (*gain)[k] = g; + } +} + +// Compute the minimum gain as the attenuating gain to put the signal just +// above the zero sample values. +void SuppressionGain::GetMinGain( + rtc::ArrayView weighted_residual_echo, + rtc::ArrayView last_nearend, + rtc::ArrayView last_echo, + bool low_noise_render, + bool saturated_echo, + rtc::ArrayView min_gain) const { + if (!saturated_echo) { + const float min_echo_power = + low_noise_render ? config_.echo_audibility.low_render_limit + : config_.echo_audibility.normal_render_limit; + + for (size_t k = 0; k < min_gain.size(); ++k) { + min_gain[k] = weighted_residual_echo[k] > 0.f + ? min_echo_power / weighted_residual_echo[k] + : 1.f; + min_gain[k] = std::min(min_gain[k], 1.f); + } + + if (!initial_state_ || + config_.suppressor.lf_smoothing_during_initial_phase) { + const float& dec = dominant_nearend_detector_->IsNearendState() + ? nearend_params_.max_dec_factor_lf + : normal_params_.max_dec_factor_lf; + + for (int k = 0; k <= config_.suppressor.last_lf_smoothing_band; ++k) { + // Make sure the gains of the low frequencies do not decrease too + // quickly after strong nearend. + if (last_nearend[k] > last_echo[k] || + k <= config_.suppressor.last_permanent_lf_smoothing_band) { + min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec); + min_gain[k] = std::min(min_gain[k], 1.f); + } + } + } + } else { + std::fill(min_gain.begin(), min_gain.end(), 0.f); + } +} + +// Compute the maximum gain by limiting the gain increase from the previous +// gain. +void SuppressionGain::GetMaxGain(rtc::ArrayView max_gain) const { + const auto& inc = dominant_nearend_detector_->IsNearendState() + ? nearend_params_.max_inc_factor + : normal_params_.max_inc_factor; + const auto& floor = config_.suppressor.floor_first_increase; + for (size_t k = 0; k < max_gain.size(); ++k) { + max_gain[k] = std::min(std::max(last_gain_[k] * inc, floor), 1.f); + } +} + +void SuppressionGain::LowerBandGain( + bool low_noise_render, + const AecState& aec_state, + rtc::ArrayView> + suppressor_input, + rtc::ArrayView> residual_echo, + rtc::ArrayView> comfort_noise, + bool clock_drift, + std::array* gain) { + gain->fill(1.f); + const bool saturated_echo = aec_state.SaturatedEcho(); + std::array max_gain; + GetMaxGain(max_gain); + + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { + std::array G; + std::array nearend; + nearend_smoothers_[ch].Average(suppressor_input[ch], nearend); + + // Weight echo power in terms of audibility. + std::array weighted_residual_echo; + WeightEchoForAudibility(config_, residual_echo[ch], weighted_residual_echo); + + std::array min_gain; + GetMinGain(weighted_residual_echo, last_nearend_[ch], last_echo_[ch], + low_noise_render, saturated_echo, min_gain); + + GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise[0], &G); + + // Clamp gains. + for (size_t k = 0; k < gain->size(); ++k) { + G[k] = std::max(std::min(G[k], max_gain[k]), min_gain[k]); + (*gain)[k] = std::min((*gain)[k], G[k]); + } + + // Store data required for the gain computation of the next block. + std::copy(nearend.begin(), nearend.end(), last_nearend_[ch].begin()); + std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(), + last_echo_[ch].begin()); + } + + LimitLowFrequencyGains(gain); + // Use conservative high-frequency gains during clock-drift or when not in + // dominant nearend. + if (!dominant_nearend_detector_->IsNearendState() || clock_drift || + config_.suppressor.conservative_hf_suppression) { + LimitHighFrequencyGains(config_.suppressor.conservative_hf_suppression, + gain); + } + + // Store computed gains. + std::copy(gain->begin(), gain->end(), last_gain_.begin()); + + // Transform gains to amplitude domain. + aec3::VectorMath(optimization_).Sqrt(*gain); +} + +SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + optimization_(optimization), + config_(config), + num_capture_channels_(num_capture_channels), + state_change_duration_blocks_( + static_cast(config_.filter.config_change_duration_blocks)), + last_nearend_(num_capture_channels_, {0}), + last_echo_(num_capture_channels_, {0}), + nearend_smoothers_( + num_capture_channels_, + aec3::MovingAverage(kFftLengthBy2Plus1, + config.suppressor.nearend_average_blocks)), + nearend_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.nearend_tuning), + normal_params_(config_.suppressor.last_lf_band, + config_.suppressor.first_hf_band, + config_.suppressor.normal_tuning), + use_unbounded_echo_spectrum_(config.suppressor.dominant_nearend_detection + .use_unbounded_echo_spectrum) { + RTC_DCHECK_LT(0, state_change_duration_blocks_); + last_gain_.fill(1.f); + if (config_.suppressor.use_subband_nearend_detection) { + dominant_nearend_detector_ = std::make_unique( + config_.suppressor.subband_nearend_detection, num_capture_channels_); + } else { + dominant_nearend_detector_ = std::make_unique( + config_.suppressor.dominant_nearend_detection, num_capture_channels_); + } + RTC_DCHECK(dominant_nearend_detector_); +} + +SuppressionGain::~SuppressionGain() = default; + +void SuppressionGain::GetGain( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum_unbounded, + rtc::ArrayView> + comfort_noise_spectrum, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const Block& render, + bool clock_drift, + float* high_bands_gain, + std::array* low_band_gain) { + RTC_DCHECK(high_bands_gain); + RTC_DCHECK(low_band_gain); + + // Choose residual echo spectrum for dominant nearend detection. + const auto echo = use_unbounded_echo_spectrum_ + ? residual_echo_spectrum_unbounded + : residual_echo_spectrum; + + // Update the nearend state selection. + dominant_nearend_detector_->Update(nearend_spectrum, echo, + comfort_noise_spectrum, initial_state_); + + // Compute gain for the lower band. + bool low_noise_render = low_render_detector_.Detect(render); + LowerBandGain(low_noise_render, aec_state, nearend_spectrum, + residual_echo_spectrum, comfort_noise_spectrum, clock_drift, + low_band_gain); + + // Compute the gain for the upper bands. + const absl::optional narrow_peak_band = + render_signal_analyzer.NarrowPeakBand(); + + *high_bands_gain = + UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band, + aec_state.SaturatedEcho(), render, *low_band_gain); + + data_dumper_->DumpRaw("aec3_dominant_nearend", + dominant_nearend_detector_->IsNearendState()); +} + +void SuppressionGain::SetInitialState(bool state) { + initial_state_ = state; + if (state) { + initial_state_change_counter_ = state_change_duration_blocks_; + } else { + initial_state_change_counter_ = 0; + } +} + +// Detects when the render signal can be considered to have low power and +// consist of stationary noise. +bool SuppressionGain::LowNoiseRenderDetector::Detect(const Block& render) { + float x2_sum = 0.f; + float x2_max = 0.f; + for (int ch = 0; ch < render.NumChannels(); ++ch) { + for (float x_k : render.View(/*band=*/0, ch)) { + const float x2 = x_k * x_k; + x2_sum += x2; + x2_max = std::max(x2_max, x2); + } + } + x2_sum = x2_sum / render.NumChannels(); + + constexpr float kThreshold = 50.f * 50.f * 64.f; + const bool low_noise_render = + average_power_ < kThreshold && x2_max < 3 * average_power_; + average_power_ = average_power_ * 0.9f + x2_sum * 0.1f; + return low_noise_render; +} + +SuppressionGain::GainParameters::GainParameters( + int last_lf_band, + int first_hf_band, + const EchoCanceller3Config::Suppressor::Tuning& tuning) + : max_inc_factor(tuning.max_inc_factor), + max_dec_factor_lf(tuning.max_dec_factor_lf) { + // Compute per-band masking thresholds. + RTC_DCHECK_LT(last_lf_band, first_hf_band); + auto& lf = tuning.mask_lf; + auto& hf = tuning.mask_hf; + RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress); + RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress); + for (int k = 0; k < static_cast(kFftLengthBy2Plus1); k++) { + float a; + if (k <= last_lf_band) { + a = 0.f; + } else if (k < first_hf_band) { + a = (k - last_lf_band) / static_cast(first_hf_band - last_lf_band); + } else { + a = 1.f; + } + enr_transparent_[k] = (1 - a) * lf.enr_transparent + a * hf.enr_transparent; + enr_suppress_[k] = (1 - a) * lf.enr_suppress + a * hf.enr_suppress; + emr_transparent_[k] = (1 - a) * lf.emr_transparent + a * hf.emr_transparent; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h new file mode 100644 index 0000000000..c19ddd7e30 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/moving_average.h" +#include "modules/audio_processing/aec3/nearend_detector.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +class SuppressionGain { + public: + SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization, + int sample_rate_hz, + size_t num_capture_channels); + ~SuppressionGain(); + + SuppressionGain(const SuppressionGain&) = delete; + SuppressionGain& operator=(const SuppressionGain&) = delete; + + void GetGain( + rtc::ArrayView> + nearend_spectrum, + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum, + rtc::ArrayView> + residual_echo_spectrum_unbounded, + rtc::ArrayView> + comfort_noise_spectrum, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const Block& render, + bool clock_drift, + float* high_bands_gain, + std::array* low_band_gain); + + bool IsDominantNearend() { + return dominant_nearend_detector_->IsNearendState(); + } + + // Toggles the usage of the initial state. + void SetInitialState(bool state); + + private: + // Computes the gain to apply for the bands beyond the first band. + float UpperBandsGain( + rtc::ArrayView> echo_spectrum, + rtc::ArrayView> + comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const Block& render, + const std::array& low_band_gain) const; + + void GainToNoAudibleEcho(const std::array& nearend, + const std::array& echo, + const std::array& masker, + std::array* gain) const; + + void LowerBandGain( + bool stationary_with_low_power, + const AecState& aec_state, + rtc::ArrayView> + suppressor_input, + rtc::ArrayView> residual_echo, + rtc::ArrayView> comfort_noise, + bool clock_drift, + std::array* gain); + + void GetMinGain(rtc::ArrayView weighted_residual_echo, + rtc::ArrayView last_nearend, + rtc::ArrayView last_echo, + bool low_noise_render, + bool saturated_echo, + rtc::ArrayView min_gain) const; + + void GetMaxGain(rtc::ArrayView max_gain) const; + + class LowNoiseRenderDetector { + public: + bool Detect(const Block& render); + + private: + float average_power_ = 32768.f * 32768.f; + }; + + struct GainParameters { + explicit GainParameters( + int last_lf_band, + int first_hf_band, + const EchoCanceller3Config::Suppressor::Tuning& tuning); + const float max_inc_factor; + const float max_dec_factor_lf; + std::array enr_transparent_; + std::array enr_suppress_; + std::array emr_transparent_; + }; + + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + const Aec3Optimization optimization_; + const EchoCanceller3Config config_; + const size_t num_capture_channels_; + const int state_change_duration_blocks_; + std::array last_gain_; + std::vector> last_nearend_; + std::vector> last_echo_; + LowNoiseRenderDetector low_render_detector_; + bool initial_state_ = true; + int initial_state_change_counter_ = 0; + std::vector nearend_smoothers_; + const GainParameters nearend_params_; + const GainParameters normal_params_; + // Determines if the dominant nearend detector uses the unbounded residual + // echo spectrum. + const bool use_unbounded_echo_spectrum_; + std::unique_ptr dominant_nearend_detector_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc new file mode 100644 index 0000000000..02de706c77 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gains works. +TEST(SuppressionGainDeathTest, NullOutputGains) { + std::vector> E2(1, {0.0f}); + std::vector> R2(1, {0.0f}); + std::vector> R2_unbounded(1, {0.0f}); + std::vector> S2(1); + std::vector> N2(1, {0.0f}); + for (auto& S2_k : S2) { + S2_k.fill(0.1f); + } + FftData E; + FftData Y; + E.re.fill(0.0f); + E.im.fill(0.0f); + Y.re.fill(0.0f); + Y.im.fill(0.0f); + + float high_bands_gain; + AecState aec_state(EchoCanceller3Config{}, 1); + EXPECT_DEATH( + SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1) + .GetGain(E2, S2, R2, R2_unbounded, N2, + RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state, + Block(3, 1), false, &high_bands_gain, nullptr), + ""); +} + +#endif + +// Does a sanity check that the gains are correctly computed. +TEST(SuppressionGain, BasicGainComputation) { + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 2; + constexpr int kSampleRateHz = 16000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + SuppressionGain suppression_gain(EchoCanceller3Config(), DetectOptimization(), + kSampleRateHz, kNumCaptureChannels); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + float high_bands_gain; + std::vector> E2(kNumCaptureChannels); + std::vector> S2(kNumCaptureChannels, + {0.0f}); + std::vector> Y2(kNumCaptureChannels); + std::vector> R2(kNumCaptureChannels); + std::vector> R2_unbounded( + kNumCaptureChannels); + std::vector> N2(kNumCaptureChannels); + std::array g; + std::vector output(kNumCaptureChannels); + Block x(kNumBands, kNumRenderChannels); + EchoCanceller3Config config; + AecState aec_state(config, kNumCaptureChannels); + ApmDataDumper data_dumper(42); + Subtractor subtractor(config, kNumRenderChannels, kNumCaptureChannels, + &data_dumper, DetectOptimization()); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); + absl::optional delay_estimate; + + // Ensure that a strong noise is detected to mask any echoes. + for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { + E2[ch].fill(10.f); + Y2[ch].fill(10.f); + R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); + N2[ch].fill(100.0f); + } + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + } + + // Ensure that the gain is no longer forced to zero. + for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); + + // Ensure that a strong nearend is detected to mask any echoes. + for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) { + E2[ch].fill(100.f); + Y2[ch].fill(100.f); + R2[ch].fill(0.1f); + R2_unbounded[ch].fill(0.1f); + S2[ch].fill(0.1f); + N2[ch].fill(0.f); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(), + subtractor.FilterImpulseResponses(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, output); + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.0f, a, 0.001f); }); + + // Add a strong echo to one of the channels and ensure that it is suppressed. + E2[1].fill(1000000000.0f); + R2[1].fill(10000000000000.0f); + R2_unbounded[1].fill(10000000000000.0f); + + for (int k = 0; k < 10; ++k) { + suppression_gain.GetGain(E2, S2, R2, R2_unbounded, N2, analyzer, aec_state, + x, false, &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(0.0f, a, 0.001f); }); +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc new file mode 100644 index 0000000000..489f53f4f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.cc @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/transparent_mode.h" + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; +constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; + +bool DeactivateTransparentMode() { + return field_trial::IsEnabled("WebRTC-Aec3TransparentModeKillSwitch"); +} + +bool ActivateTransparentModeHmm() { + return field_trial::IsEnabled("WebRTC-Aec3TransparentModeHmm"); +} + +} // namespace + +// Classifier that toggles transparent mode which reduces echo suppression when +// headsets are used. +class TransparentModeImpl : public TransparentMode { + public: + bool Active() const override { return transparency_activated_; } + + void Reset() override { + // Determines if transparent mode is used. + transparency_activated_ = false; + + // The estimated probability of being transparent mode. + prob_transparent_state_ = 0.f; + } + + void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) override { + // The classifier is implemented as a Hidden Markov Model (HMM) with two + // hidden states: "normal" and "transparent". The estimated probabilities of + // the two states are updated by observing filter convergence during active + // render. The filters are less likely to be reported as converged when + // there is no echo present in the microphone signal. + + // The constants have been obtained by observing active_render and + // any_coarse_filter_converged under varying call scenarios. They + // have further been hand tuned to prefer normal state during uncertain + // regions (to avoid echo leaks). + + // The model is only updated during active render. + if (!active_render) + return; + + // Probability of switching from one state to the other. + constexpr float kSwitch = 0.000001f; + + // Probability of observing converged filters in states "normal" and + // "transparent" during active render. + constexpr float kConvergedNormal = 0.01f; + constexpr float kConvergedTransparent = 0.001f; + + // Probability of transitioning to transparent state from normal state and + // transparent state respectively. + constexpr float kA[2] = {kSwitch, 1.f - kSwitch}; + + // Probability of the two observations (converged filter or not converged + // filter) in normal state and transparent state respectively. + constexpr float kB[2][2] = { + {1.f - kConvergedNormal, kConvergedNormal}, + {1.f - kConvergedTransparent, kConvergedTransparent}}; + + // Probability of the two states before the update. + const float prob_transparent = prob_transparent_state_; + const float prob_normal = 1.f - prob_transparent; + + // Probability of transitioning to transparent state. + const float prob_transition_transparent = + prob_normal * kA[0] + prob_transparent * kA[1]; + const float prob_transition_normal = 1.f - prob_transition_transparent; + + // Observed output. + const int out = static_cast(any_coarse_filter_converged); + + // Joint probabilites of the observed output and respective states. + const float prob_joint_normal = prob_transition_normal * kB[0][out]; + const float prob_joint_transparent = + prob_transition_transparent * kB[1][out]; + + // Conditional probability of transparent state and the observed output. + RTC_DCHECK_GT(prob_joint_normal + prob_joint_transparent, 0.f); + prob_transparent_state_ = + prob_joint_transparent / (prob_joint_normal + prob_joint_transparent); + + // Transparent mode is only activated when its state probability is high. + // Dead zone between activation/deactivation thresholds to avoid switching + // back and forth. + if (prob_transparent_state_ > 0.95f) { + transparency_activated_ = true; + } else if (prob_transparent_state_ < 0.5f) { + transparency_activated_ = false; + } + } + + private: + bool transparency_activated_ = false; + float prob_transparent_state_ = 0.f; +}; + +// Legacy classifier for toggling transparent mode. +class LegacyTransparentModeImpl : public TransparentMode { + public: + explicit LegacyTransparentModeImpl(const EchoCanceller3Config& config) + : linear_and_stable_echo_path_( + config.echo_removal_control.linear_and_stable_echo_path), + active_blocks_since_sane_filter_(kBlocksSinceConsistentEstimateInit), + non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} + + bool Active() const override { return transparency_activated_; } + + void Reset() override { + non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; + diverged_sequence_size_ = 0; + strong_not_saturated_render_blocks_ = 0; + if (linear_and_stable_echo_path_) { + recent_convergence_during_activity_ = false; + } + } + + void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) override { + ++capture_block_counter_; + strong_not_saturated_render_blocks_ += + active_render && !saturated_capture ? 1 : 0; + + if (any_filter_consistent && filter_delay_blocks < 5) { + sane_filter_observed_ = true; + active_blocks_since_sane_filter_ = 0; + } else if (active_render) { + ++active_blocks_since_sane_filter_; + } + + bool sane_filter_recently_seen; + if (!sane_filter_observed_) { + sane_filter_recently_seen = + capture_block_counter_ <= 5 * kNumBlocksPerSecond; + } else { + sane_filter_recently_seen = + active_blocks_since_sane_filter_ <= 30 * kNumBlocksPerSecond; + } + + if (any_filter_converged) { + recent_convergence_during_activity_ = true; + active_non_converged_sequence_size_ = 0; + non_converged_sequence_size_ = 0; + ++num_converged_blocks_; + } else { + if (++non_converged_sequence_size_ > 20 * kNumBlocksPerSecond) { + num_converged_blocks_ = 0; + } + + if (active_render && + ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { + recent_convergence_during_activity_ = false; + } + } + + if (!all_filters_diverged) { + diverged_sequence_size_ = 0; + } else if (++diverged_sequence_size_ >= 60) { + // TODO(peah): Change these lines to ensure proper triggering of usable + // filter. + non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; + } + + if (active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { + finite_erl_recently_detected_ = false; + } + if (num_converged_blocks_ > 50) { + finite_erl_recently_detected_ = true; + } + + if (finite_erl_recently_detected_) { + transparency_activated_ = false; + } else if (sane_filter_recently_seen && + recent_convergence_during_activity_) { + transparency_activated_ = false; + } else { + const bool filter_should_have_converged = + strong_not_saturated_render_blocks_ > 6 * kNumBlocksPerSecond; + transparency_activated_ = filter_should_have_converged; + } + } + + private: + const bool linear_and_stable_echo_path_; + size_t capture_block_counter_ = 0; + bool transparency_activated_ = false; + size_t active_blocks_since_sane_filter_; + bool sane_filter_observed_ = false; + bool finite_erl_recently_detected_ = false; + size_t non_converged_sequence_size_; + size_t diverged_sequence_size_ = 0; + size_t active_non_converged_sequence_size_ = 0; + size_t num_converged_blocks_ = 0; + bool recent_convergence_during_activity_ = false; + size_t strong_not_saturated_render_blocks_ = 0; +}; + +std::unique_ptr TransparentMode::Create( + const EchoCanceller3Config& config) { + if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled"; + return nullptr; + } + if (ActivateTransparentModeHmm()) { + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM"; + return std::make_unique(); + } + RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy"; + return std::make_unique(config); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h new file mode 100644 index 0000000000..bc5dd0391b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/transparent_mode.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ + +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Class for detecting and toggling the transparent mode which causes the +// suppressor to apply less suppression. +class TransparentMode { + public: + static std::unique_ptr Create( + const EchoCanceller3Config& config); + + virtual ~TransparentMode() {} + + // Returns whether the transparent mode should be active. + virtual bool Active() const = 0; + + // Resets the state of the detector. + virtual void Reset() = 0; + + // Updates the detection decision based on new data. + virtual void Update(int filter_delay_blocks, + bool any_filter_consistent, + bool any_filter_converged, + bool any_coarse_filter_converged, + bool all_filters_diverged, + bool active_render, + bool saturated_capture) = 0; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_AEC3_TRANSPARENT_MODE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h new file mode 100644 index 0000000000..e4d1381ae1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Provides optimizations for mathematical operations based on vectors. +class VectorMath { + public: + explicit VectorMath(Aec3Optimization optimization) + : optimization_(optimization) {} + + // Elementwise square root. + void SqrtAVX2(rtc::ArrayView x); + void Sqrt(rtc::ArrayView x) { + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + __m128 g = _mm_loadu_ps(&x[j]); + g = _mm_sqrt_ps(g); + _mm_storeu_ps(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } break; + case Aec3Optimization::kAvx2: + SqrtAVX2(x); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + float32x4_t g = vld1q_f32(&x[j]); +#if !defined(WEBRTC_ARCH_ARM64) + float32x4_t y = vrsqrteq_f32(g); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is + // returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = + vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(y)); + // zero out the positive infinity results + y = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(y))); + // from arm documentation + // The Newton-Raphson iteration: + // y[n+1] = y[n] * (3 - d * (y[n] * y[n])) / 2) + // converges to (1/√d) if y0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (int i = 0; i < 2; i++) { + y = vmulq_f32(vrsqrtsq_f32(vmulq_f32(y, y), g), y); + } + // sqrt(g) = g * 1/sqrt(g) + g = vmulq_f32(g, y); +#else + g = vsqrtq_f32(g); +#endif + vst1q_f32(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } +#endif + break; + default: + std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); }); + } + } + + // Elementwise vector multiplication z = x * y. + void MultiplyAVX2(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z); + void Multiply(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + const __m128 y_j = _mm_loadu_ps(&y[j]); + const __m128 z_j = _mm_mul_ps(x_j, y_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; + case Aec3Optimization::kAvx2: + MultiplyAVX2(x, y, z); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + const float32x4_t y_j = vld1q_f32(&y[j]); + const float32x4_t z_j = vmulq_f32(x_j, y_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), y.begin(), z.begin(), + std::multiplies()); + } + } + + // Elementwise vector accumulation z += x. + void AccumulateAVX2(rtc::ArrayView x, rtc::ArrayView z); + void Accumulate(rtc::ArrayView x, rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + __m128 z_j = _mm_loadu_ps(&z[j]); + z_j = _mm_add_ps(x_j, z_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; + case Aec3Optimization::kAvx2: + AccumulateAVX2(x, z); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + float32x4_t z_j = vld1q_f32(&z[j]); + z_j = vaddq_f32(z_j, x_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), z.begin(), z.begin(), + std::plus()); + } + } + + private: + Aec3Optimization optimization_; +}; + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc new file mode 100644 index 0000000000..0b5f3c142e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_avx2.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/vector_math.h" + +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Elementwise square root. +void VectorMath::SqrtAVX2(rtc::ArrayView x) { + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + __m256 g = _mm256_loadu_ps(&x[j]); + g = _mm256_sqrt_ps(g); + _mm256_storeu_ps(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } +} + +// Elementwise vector multiplication z = x * y. +void VectorMath::MultiplyAVX2(rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + const __m256 x_j = _mm256_loadu_ps(&x[j]); + const __m256 y_j = _mm256_loadu_ps(&y[j]); + const __m256 z_j = _mm256_mul_ps(x_j, y_j); + _mm256_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } +} + +// Elementwise vector accumulation z += x. +void VectorMath::AccumulateAVX2(rtc::ArrayView x, + rtc::ArrayView z) { + RTC_DCHECK_EQ(z.size(), x.size()); + const int x_size = static_cast(x.size()); + const int vector_limit = x_size >> 3; + + int j = 0; + for (; j < vector_limit * 8; j += 8) { + const __m256 x_j = _mm256_loadu_ps(&x[j]); + __m256 z_j = _mm256_loadu_ps(&z[j]); + z_j = _mm256_add_ps(x_j, z_j); + _mm256_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build new file mode 100644 index 0000000000..89ee0b6a81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vector_math_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc new file mode 100644 index 0000000000..a9c37e33cf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/vector_math_unittest.cc @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/vector_math.h" + +#include + +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +#if defined(WEBRTC_HAS_NEON) + +TEST(VectorMath, Sqrt) { + std::array x; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_neon.begin()); + aec3::VectorMath(Aec3Optimization::kNeon).Sqrt(z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_NEAR(z[k], z_neon[k], 0.0001f); + EXPECT_NEAR(sqrtf(x[k]), z_neon[k], 0.0001f); + } +} + +TEST(VectorMath, Multiply) { + std::array x; + std::array y; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kNeon).Multiply(x, y, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_neon[k]); + } +} + +TEST(VectorMath, Accumulate) { + std::array x; + std::array z; + std::array z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_neon[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kNeon).Accumulate(x, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_neon[k]); + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +TEST(VectorMath, Sse2Sqrt) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_sse2.begin()); + aec3::VectorMath(Aec3Optimization::kSse2).Sqrt(z_sse2); + EXPECT_EQ(z, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(sqrtf(x[k]), z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Sqrt) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_avx2.begin()); + aec3::VectorMath(Aec3Optimization::kAvx2).Sqrt(z_avx2); + EXPECT_EQ(z, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(sqrtf(x[k]), z_avx2[k]); + } + } +} + +TEST(VectorMath, Sse2Multiply) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array y; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kSse2).Multiply(x, y, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Multiply) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array y; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kAvx2).Multiply(x, y, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_avx2[k]); + } + } +} + +TEST(VectorMath, Sse2Accumulate) { + if (GetCPUInfo(kSSE2) != 0) { + std::array x; + std::array z; + std::array z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_sse2[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kSse2).Accumulate(x, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_sse2[k]); + } + } +} + +TEST(VectorMath, Avx2Accumulate) { + if (GetCPUInfo(kAVX2) != 0) { + std::array x; + std::array z; + std::array z_avx2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_avx2[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kAvx2).Accumulate(x, z_avx2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_avx2[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_avx2[k]); + } + } +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn new file mode 100644 index 0000000000..38d8776258 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/BUILD.gn @@ -0,0 +1,112 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") # This contains def of 'rtc_enable_protobuf' + +rtc_source_set("aec_dump") { + visibility = [ "*" ] + sources = [ "aec_dump_factory.h" ] + + deps = [ + "..:aec_dump_interface", + "../../../rtc_base/system:file_wrapper", + "../../../rtc_base/system:rtc_export", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +if (rtc_include_tests) { + rtc_library("mock_aec_dump") { + testonly = true + sources = [ + "mock_aec_dump.cc", + "mock_aec_dump.h", + ] + + deps = [ + "..:aec_dump_interface", + "..:audioproc_test_utils", + "../", + "../../../test:test_support", + ] + } + + rtc_library("mock_aec_dump_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + sources = [ "aec_dump_integration_test.cc" ] + + deps = [ + ":mock_aec_dump", + "..:api", + "..:audioproc_test_utils", + "../", + "//testing/gtest", + ] + } +} + +if (rtc_enable_protobuf) { + rtc_library("aec_dump_impl") { + sources = [ + "aec_dump_impl.cc", + "aec_dump_impl.h", + "capture_stream_info.cc", + "capture_stream_info.h", + ] + + deps = [ + ":aec_dump", + "..:aec_dump_interface", + "../../../api/audio:audio_frame_api", + "../../../api/task_queue", + "../../../rtc_base:checks", + "../../../rtc_base:ignore_wundef", + "../../../rtc_base:logging", + "../../../rtc_base:macromagic", + "../../../rtc_base:protobuf_utils", + "../../../rtc_base:race_checker", + "../../../rtc_base:rtc_event", + "../../../rtc_base:rtc_task_queue", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + + deps += [ "../:audioproc_debug_proto" ] + } + + if (rtc_include_tests) { + rtc_library("aec_dump_unittests") { + testonly = true + defines = [] + deps = [ + ":aec_dump", + ":aec_dump_impl", + "..:audioproc_debug_proto", + "../", + "../../../rtc_base:task_queue_for_test", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + sources = [ "aec_dump_unittest.cc" ] + } + } +} + +rtc_library("null_aec_dump_factory") { + assert_no_deps = [ ":aec_dump_impl" ] + sources = [ "null_aec_dump_factory.cc" ] + + deps = [ + ":aec_dump", + "..:aec_dump_interface", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h new file mode 100644 index 0000000000..20718c3d7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_factory.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/system/rtc_export.h" + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +class RTC_EXPORT AecDumpFactory { + public: + // The `worker_queue` may not be null and must outlive the created + // AecDump instance. `max_log_size_bytes == -1` means the log size + // will be unlimited. `handle` may not be null. The AecDump takes + // responsibility for `handle` and closes it in the destructor. A + // non-null return value indicates that the file has been + // sucessfully opened. + static std::unique_ptr Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build new file mode 100644 index 0000000000..201cd58360 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build @@ -0,0 +1,209 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec_dump_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc new file mode 100644 index 0000000000..94c24048e0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/aec_dump_impl.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/task_queue.h" + +namespace webrtc { + +namespace { +void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config, + webrtc::audioproc::Config* pb_cfg) { + pb_cfg->set_aec_enabled(config.aec_enabled); + pb_cfg->set_aec_delay_agnostic_enabled(config.aec_delay_agnostic_enabled); + pb_cfg->set_aec_drift_compensation_enabled( + config.aec_drift_compensation_enabled); + pb_cfg->set_aec_extended_filter_enabled(config.aec_extended_filter_enabled); + pb_cfg->set_aec_suppression_level(config.aec_suppression_level); + + pb_cfg->set_aecm_enabled(config.aecm_enabled); + pb_cfg->set_aecm_comfort_noise_enabled(config.aecm_comfort_noise_enabled); + pb_cfg->set_aecm_routing_mode(config.aecm_routing_mode); + + pb_cfg->set_agc_enabled(config.agc_enabled); + pb_cfg->set_agc_mode(config.agc_mode); + pb_cfg->set_agc_limiter_enabled(config.agc_limiter_enabled); + pb_cfg->set_noise_robust_agc_enabled(config.noise_robust_agc_enabled); + + pb_cfg->set_hpf_enabled(config.hpf_enabled); + + pb_cfg->set_ns_enabled(config.ns_enabled); + pb_cfg->set_ns_level(config.ns_level); + + pb_cfg->set_transient_suppression_enabled( + config.transient_suppression_enabled); + + pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled); + pb_cfg->set_pre_amplifier_fixed_gain_factor( + config.pre_amplifier_fixed_gain_factor); + + pb_cfg->set_experiments_description(config.experiments_description); +} + +} // namespace + +AecDumpImpl::AecDumpImpl(FileWrapper debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) + : debug_file_(std::move(debug_file)), + num_bytes_left_for_log_(max_log_size_bytes), + worker_queue_(worker_queue) {} + +AecDumpImpl::~AecDumpImpl() { + // Block until all tasks have finished running. + rtc::Event thread_sync_event; + worker_queue_->PostTask([&thread_sync_event] { thread_sync_event.Set(); }); + // Wait until the event has been signaled with .Set(). By then all + // pending tasks will have finished. + thread_sync_event.Wait(rtc::Event::kForever); +} + +void AecDumpImpl::WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::INIT); + audioproc::Init* msg = event->mutable_init(); + + msg->set_sample_rate(api_format.input_stream().sample_rate_hz()); + msg->set_output_sample_rate(api_format.output_stream().sample_rate_hz()); + msg->set_reverse_sample_rate( + api_format.reverse_input_stream().sample_rate_hz()); + msg->set_reverse_output_sample_rate( + api_format.reverse_output_stream().sample_rate_hz()); + + msg->set_num_input_channels( + static_cast(api_format.input_stream().num_channels())); + msg->set_num_output_channels( + static_cast(api_format.output_stream().num_channels())); + msg->set_num_reverse_channels( + static_cast(api_format.reverse_input_stream().num_channels())); + msg->set_num_reverse_output_channels( + api_format.reverse_output_stream().num_channels()); + msg->set_timestamp_ms(time_now_ms); + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::AddCaptureStreamInput( + const AudioFrameView& src) { + capture_stream_info_.AddInput(src); +} + +void AecDumpImpl::AddCaptureStreamOutput( + const AudioFrameView& src) { + capture_stream_info_.AddOutput(src); +} + +void AecDumpImpl::AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + capture_stream_info_.AddInput(data, num_channels, samples_per_channel); +} + +void AecDumpImpl::AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + capture_stream_info_.AddOutput(data, num_channels, samples_per_channel); +} + +void AecDumpImpl::AddAudioProcessingState(const AudioProcessingState& state) { + capture_stream_info_.AddAudioProcessingState(state); +} + +void AecDumpImpl::WriteCaptureStreamMessage() { + PostWriteToFileTask(capture_stream_info_.FetchEvent()); +} + +void AecDumpImpl::WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + msg->set_data(data, data_size); + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteRenderStreamMessage( + const AudioFrameView& src) { + auto event = std::make_unique(); + event->set_type(audioproc::Event::REVERSE_STREAM); + + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + msg->add_channel(channel_view.begin(), sizeof(float) * channel_view.size()); + } + + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteConfig(const InternalAPMConfig& config) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + auto event = std::make_unique(); + event->set_type(audioproc::Event::CONFIG); + CopyFromConfigToEvent(config, event->mutable_config()); + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + auto event = std::make_unique(); + event->set_type(audioproc::Event::RUNTIME_SETTING); + audioproc::RuntimeSetting* setting = event->mutable_runtime_setting(); + switch (runtime_setting.type()) { + case AudioProcessing::RuntimeSetting::Type::kCapturePreGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_pre_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCapturePostGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_post_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type:: + kCustomRenderProcessingRuntimeSetting: { + float x; + runtime_setting.GetFloat(&x); + setting->set_custom_render_processing_setting(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCaptureCompressionGain: + // Runtime AGC1 compression gain is ignored. + // TODO(http://bugs.webrtc.org/10432): Store compression gain in aecdumps. + break; + case AudioProcessing::RuntimeSetting::Type::kCaptureFixedPostGain: { + float x; + runtime_setting.GetFloat(&x); + setting->set_capture_fixed_post_gain(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kCaptureOutputUsed: { + bool x; + runtime_setting.GetBool(&x); + setting->set_capture_output_used(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kPlayoutVolumeChange: { + int x; + runtime_setting.GetInt(&x); + setting->set_playout_volume_change(x); + break; + } + case AudioProcessing::RuntimeSetting::Type::kPlayoutAudioDeviceChange: { + AudioProcessing::RuntimeSetting::PlayoutAudioDeviceInfo src; + runtime_setting.GetPlayoutAudioDeviceInfo(&src); + auto* dst = setting->mutable_playout_audio_device_change(); + dst->set_id(src.id); + dst->set_max_volume(src.max_volume); + break; + } + case AudioProcessing::RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + } + PostWriteToFileTask(std::move(event)); +} + +void AecDumpImpl::PostWriteToFileTask(std::unique_ptr event) { + RTC_DCHECK(event); + worker_queue_->PostTask([event = std::move(event), this] { + std::string event_string = event->SerializeAsString(); + const size_t event_byte_size = event_string.size(); + + if (num_bytes_left_for_log_ >= 0) { + const int64_t next_message_size = sizeof(int32_t) + event_byte_size; + if (num_bytes_left_for_log_ < next_message_size) { + // Ensure that no further events are written, even if they're smaller + // than the current event. + num_bytes_left_for_log_ = 0; + return; + } + num_bytes_left_for_log_ -= next_message_size; + } + + // Write message preceded by its size. + if (!debug_file_.Write(&event_byte_size, sizeof(int32_t))) { + RTC_DCHECK_NOTREACHED(); + } + if (!debug_file_.Write(event_string.data(), event_string.size())) { + RTC_DCHECK_NOTREACHED(); + } + }); +} + +std::unique_ptr AecDumpFactory::Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + RTC_DCHECK(worker_queue); + if (!file.is_open()) + return nullptr; + + return std::make_unique(std::move(file), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr AecDumpFactory::Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return Create(FileWrapper::OpenWriteOnly(file_name), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return Create(FileWrapper(handle), max_log_size_bytes, worker_queue); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h new file mode 100644 index 0000000000..fac3712b7a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_impl.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ + +#include +#include +#include + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread_annotations.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +// Task-queue based implementation of AecDump. It is thread safe by +// relying on locks in TaskQueue. +class AecDumpImpl : public AecDump { + public: + // `max_log_size_bytes` - maximum number of bytes to write to the debug file, + // `max_log_size_bytes == -1` means the log size will be unlimited. + AecDumpImpl(FileWrapper debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + AecDumpImpl(const AecDumpImpl&) = delete; + AecDumpImpl& operator=(const AecDumpImpl&) = delete; + ~AecDumpImpl() override; + + void WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) override; + void AddCaptureStreamInput(const AudioFrameView& src) override; + void AddCaptureStreamOutput(const AudioFrameView& src) override; + void AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void AddAudioProcessingState(const AudioProcessingState& state) override; + void WriteCaptureStreamMessage() override; + + void WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) override; + void WriteRenderStreamMessage( + const AudioFrameView& src) override; + + void WriteConfig(const InternalAPMConfig& config) override; + + void WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) override; + + private: + void PostWriteToFileTask(std::unique_ptr event); + + FileWrapper debug_file_; + int64_t num_bytes_left_for_log_ = 0; + rtc::RaceChecker race_checker_; + rtc::TaskQueue* worker_queue_; + CaptureStreamInfo capture_stream_info_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc new file mode 100644 index 0000000000..503135d87f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::Exactly; +using ::testing::StrictMock; + +namespace { +rtc::scoped_refptr CreateAudioProcessing() { + rtc::scoped_refptr apm( + webrtc::AudioProcessingBuilderForTesting().Create()); + RTC_DCHECK(apm); + return apm; +} + +std::unique_ptr CreateMockAecDump() { + auto mock_aec_dump = + std::make_unique>(); + EXPECT_CALL(*mock_aec_dump.get(), WriteConfig(_)).Times(AtLeast(1)); + EXPECT_CALL(*mock_aec_dump.get(), WriteInitMessage(_, _)).Times(AtLeast(1)); + return std::unique_ptr(std::move(mock_aec_dump)); +} + +} // namespace + +TEST(AecDumpIntegration, ConfigurationAndInitShouldBeLogged) { + auto apm = CreateAudioProcessing(); + + apm->AttachAecDump(CreateMockAecDump()); +} + +TEST(AecDumpIntegration, + RenderStreamShouldBeLoggedOnceEveryProcessReverseStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + constexpr int kNumChannels = 1; + constexpr int kNumSampleRateHz = 16000; + constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100; + std::array frame; + frame.fill(0.f); + webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels); + + EXPECT_CALL(*mock_aec_dump.get(), WriteRenderStreamMessage(_, _, _)) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessReverseStream(frame.data(), stream_config, stream_config, + frame.data()); +} + +TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + constexpr int kNumChannels = 1; + constexpr int kNumSampleRateHz = 16000; + constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100; + std::array frame; + frame.fill(0.f); + + webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels); + + EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamInput(_, _, _)) + .Times(AtLeast(1)); + + EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamOutput(_, _, _)) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), AddAudioProcessingState(_)) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), WriteCaptureStreamMessage()) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc new file mode 100644 index 0000000000..62f896fe14 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(AecDumper, APICallsDoNotCrash) { + // Note order of initialization: Task queue has to be initialized + // before AecDump. + webrtc::TaskQueueForTest file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + + constexpr int kNumChannels = 1; + constexpr int kNumSamplesPerChannel = 160; + std::array frame; + frame.fill(0.f); + aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels, + kNumSamplesPerChannel); + + aec_dump->AddCaptureStreamInput(frame.data(), kNumChannels, + kNumSamplesPerChannel); + aec_dump->AddCaptureStreamOutput(frame.data(), kNumChannels, + kNumSamplesPerChannel); + + aec_dump->WriteCaptureStreamMessage(); + + webrtc::InternalAPMConfig apm_config; + aec_dump->WriteConfig(apm_config); + + webrtc::ProcessingConfig api_format; + constexpr int64_t kTimeNowMs = 123456789ll; + aec_dump->WriteInitMessage(api_format, kTimeNowMs); + } + // Remove file after the AecDump d-tor has finished. + ASSERT_EQ(0, remove(filename.c_str())); +} + +TEST(AecDumper, WriteToFile) { + webrtc::TaskQueueForTest file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + + constexpr int kNumChannels = 1; + constexpr int kNumSamplesPerChannel = 160; + std::array frame; + frame.fill(0.f); + + aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels, + kNumSamplesPerChannel); + } + + // Verify the file has been written after the AecDump d-tor has + // finished. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc new file mode 100644 index 0000000000..7d82a39729 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" + +namespace webrtc { + +void CaptureStreamInfo::AddInput(const AudioFrameView& src) { + auto* stream = event_->mutable_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_input_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddOutput(const AudioFrameView& src) { + auto* stream = event_->mutable_stream(); + + for (int i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_output_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddInput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto* stream = event_->mutable_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + stream->set_input_data(data, data_size); +} + +void CaptureStreamInfo::AddOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) { + auto* stream = event_->mutable_stream(); + const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels; + stream->set_output_data(data, data_size); +} + +void CaptureStreamInfo::AddAudioProcessingState( + const AecDump::AudioProcessingState& state) { + auto* stream = event_->mutable_stream(); + stream->set_delay(state.delay); + stream->set_drift(state.drift); + if (state.applied_input_volume.has_value()) { + stream->set_applied_input_volume(*state.applied_input_volume); + } + stream->set_keypress(state.keypress); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h new file mode 100644 index 0000000000..0819bbcb23 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/capture_stream_info.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ + +#include +#include + +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/ignore_wundef.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +class CaptureStreamInfo { + public: + CaptureStreamInfo() { CreateNewEvent(); } + CaptureStreamInfo(const CaptureStreamInfo&) = delete; + CaptureStreamInfo& operator=(const CaptureStreamInfo&) = delete; + ~CaptureStreamInfo() = default; + + void AddInput(const AudioFrameView& src); + void AddOutput(const AudioFrameView& src); + + void AddInput(const int16_t* const data, + int num_channels, + int samples_per_channel); + void AddOutput(const int16_t* const data, + int num_channels, + int samples_per_channel); + + void AddAudioProcessingState(const AecDump::AudioProcessingState& state); + + std::unique_ptr FetchEvent() { + std::unique_ptr result = std::move(event_); + CreateNewEvent(); + return result; + } + + private: + void CreateNewEvent() { + event_ = std::make_unique(); + event_->set_type(audioproc::Event::STREAM); + } + std::unique_ptr event_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc new file mode 100644 index 0000000000..fe35d81db9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" + +namespace webrtc { + +namespace test { + +MockAecDump::MockAecDump() = default; +MockAecDump::~MockAecDump() = default; +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h new file mode 100644 index 0000000000..b396739de4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/mock_aec_dump.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ + +#include + +#include "modules/audio_processing/include/aec_dump.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { + +class MockAecDump : public AecDump { + public: + MockAecDump(); + virtual ~MockAecDump(); + + MOCK_METHOD(void, + WriteInitMessage, + (const ProcessingConfig& api_format, int64_t time_now_ms), + (override)); + + MOCK_METHOD(void, + AddCaptureStreamInput, + (const AudioFrameView& src), + (override)); + MOCK_METHOD(void, + AddCaptureStreamOutput, + (const AudioFrameView& src), + (override)); + MOCK_METHOD(void, + AddCaptureStreamInput, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + AddCaptureStreamOutput, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + AddAudioProcessingState, + (const AudioProcessingState& state), + (override)); + MOCK_METHOD(void, WriteCaptureStreamMessage, (), (override)); + + MOCK_METHOD(void, + WriteRenderStreamMessage, + (const int16_t* const data, + int num_channels, + int samples_per_channel), + (override)); + MOCK_METHOD(void, + WriteRenderStreamMessage, + (const AudioFrameView& src), + (override)); + + MOCK_METHOD(void, WriteConfig, (const InternalAPMConfig& config), (override)); + + MOCK_METHOD(void, + WriteRuntimeSetting, + (const AudioProcessing::RuntimeSetting& config), + (override)); +}; + +} // namespace test + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc new file mode 100644 index 0000000000..9bd9745069 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { + +std::unique_ptr AecDumpFactory::Create(webrtc::FileWrapper file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr AecDumpFactory::Create(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build new file mode 100644 index 0000000000..974b70b087 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("null_aec_dump_factory_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build new file mode 100644 index 0000000000..1c47bbd5cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec_dump_interface_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("aec_dump_interface_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn new file mode 100644 index 0000000000..a77f04aba5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/BUILD.gn @@ -0,0 +1,44 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("aecm_core") { + sources = [ + "aecm_core.cc", + "aecm_core.h", + "aecm_defines.h", + "echo_control_mobile.cc", + "echo_control_mobile.h", + ] + deps = [ + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:sanitizer", + "../../../system_wrappers", + "../utility:legacy_delay_estimator", + ] + cflags = [] + + if (rtc_build_with_neon) { + sources += [ "aecm_core_neon.cc" ] + + if (target_cpu != "arm64") { + # Enable compilation for the NEON instruction set. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags += [ "-mfpu=neon" ] + } + } + + if (target_cpu == "mipsel") { + sources += [ "aecm_core_mips.cc" ] + } else { + sources += [ "aecm_core_c.cc" ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc new file mode 100644 index 0000000000..fbc3239732 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc @@ -0,0 +1,1125 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include +#include +#include + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +#ifdef AEC_DEBUG +FILE* dfile; +FILE* testfile; +#endif + +// Initialization table for echo channel in 8 kHz +static const int16_t kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, 1451, 1506, 1562, + 1644, 1726, 1804, 1882, 1918, 1953, 1982, 2010, 2025, 2040, 2034, + 2027, 2021, 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, 1635, + 1604, 1572, 1545, 1517, 1481, 1444, 1405, 1367, 1331, 1294, 1270, + 1245, 1239, 1233, 1247, 1260, 1282, 1303, 1338, 1373, 1407, 1441, + 1470, 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, 1676}; + +// Initialization table for echo channel in 16 kHz +static const int16_t kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, 1953, 2010, 2040, + 2027, 2014, 1980, 1869, 1732, 1635, 1572, 1517, 1444, 1367, 1294, + 1245, 1233, 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, 1676, + 1741, 1802, 1861, 1921, 1983, 2040, 2102, 2170, 2265, 2375, 2515, + 2651, 2781, 2922, 3075, 3253, 3471, 3738, 3976, 4151, 4258, 4308, + 4288, 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, 3153}; + +} // namespace + +const int16_t WebRtcAecm_kCosTable[] = { + 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, 8091, 8067, + 8041, 8012, 7982, 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, + 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, 7094, 7021, 6947, + 6870, 6791, 6710, 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, + 5892, 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, 4815, + 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, 3719, 3591, 3462, + 3331, 3200, 3068, 2935, 2801, 2667, 2531, 2395, 2258, 2120, 1981, + 1842, 1703, 1563, 1422, 1281, 1140, 998, 856, 713, 571, 428, + 285, 142, 0, -142, -285, -428, -571, -713, -856, -998, -1140, + -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667, + -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, -3845, -3971, -4095, + -4219, -4341, -4461, -4580, -4698, -4815, -4930, -5043, -5155, -5265, -5374, + -5481, -5586, -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, + -6542, -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233, -7299, + -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, -7791, -7834, -7874, + -7912, -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, -8147, -8160, + -8172, -8180, -8187, -8190, -8191, -8190, -8187, -8180, -8172, -8160, -8147, + -8130, -8112, -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, -7299, -7233, + -7164, -7094, -7021, -6947, -6870, -6791, -6710, -6627, -6542, -6455, -6366, + -6275, -6182, -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374, -5265, + -5155, -5043, -4930, -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, + -3845, -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667, -2531, + -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, -1140, -998, + -856, -713, -571, -428, -285, -142, 0, 142, 285, 428, 571, + 713, 856, 998, 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, + 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591, + 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698, 4815, 4930, + 5043, 5155, 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991, 6087, + 6182, 6275, 6366, 6455, 6542, 6627, 6710, 6791, 6870, 6947, 7021, + 7094, 7164, 7233, 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, + 7745, 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067, 8091, + 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190}; + +const int16_t WebRtcAecm_kSinTable[] = { + 0, 142, 285, 428, 571, 713, 856, 998, 1140, 1281, 1422, + 1563, 1703, 1842, 1981, 2120, 2258, 2395, 2531, 2667, 2801, 2935, + 3068, 3200, 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219, 4341, + 4461, 4580, 4698, 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, + 5690, 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542, 6627, + 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299, 7362, 7424, + 7483, 7540, 7595, 7647, 7697, 7745, 7791, 7834, 7874, 7912, 7948, + 7982, 8012, 8041, 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180, + 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, + 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, 7791, 7745, + 7697, 7647, 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, 7094, + 7021, 6947, 6870, 6791, 6710, 6627, 6542, 6455, 6366, 6275, 6182, + 6087, 5991, 5892, 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, + 4930, 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, 3719, + 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667, 2531, 2395, 2258, + 2120, 1981, 1842, 1703, 1563, 1422, 1281, 1140, 998, 856, 713, + 571, 428, 285, 142, 0, -142, -285, -428, -571, -713, -856, + -998, -1140, -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, + -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, -3845, + -3971, -4095, -4219, -4341, -4461, -4580, -4698, -4815, -4930, -5043, -5155, + -5265, -5374, -5481, -5586, -5690, -5792, -5892, -5991, -6087, -6182, -6275, + -6366, -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, + -7233, -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, -7791, + -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, + -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190, -8187, -8180, -8172, + -8160, -8147, -8130, -8112, -8091, -8067, -8041, -8012, -7982, -7948, -7912, + -7874, -7834, -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, + -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710, -6627, -6542, + -6455, -6366, -6275, -6182, -6087, -5991, -5892, -5792, -5690, -5586, -5481, + -5374, -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461, -4341, -4219, + -4096, -3971, -3845, -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, + -2667, -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, + -1140, -998, -856, -713, -571, -428, -285, -142}; + + +// Moves the pointer to the next entry and inserts `far_spectrum` and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= MAX_DELAY) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]), far_spectrum, + sizeof(uint16_t) * PART_LEN1); +} + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, + int* far_q, + int delay) { + int buffer_position = 0; + RTC_DCHECK(self); + buffer_position = self->far_history_pos - delay; + + // Check buffer position + if (buffer_position < 0) { + buffer_position += MAX_DELAY; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return &(self->far_history[buffer_position * PART_LEN1]); +} + +// Declare function pointers. +CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; +StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; +ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +AecmCore* WebRtcAecm_CreateCore() { + // Allocate zero-filled memory. + AecmCore* aecm = static_cast(calloc(1, sizeof(AecmCore))); + + aecm->farFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->farFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearNoisyFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->nearNoisyFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearCleanFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->nearCleanFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->outFrameBuf = + WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + if (!aecm->outFrameBuf) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, MAX_DELAY); + if (aecm->delay_estimator_farend == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + aecm->delay_estimator = + WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0); + if (aecm->delay_estimator == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + // TODO(bjornv): Explicitly disable robust delay validation until no + // performance regression has been established. Then remove the line. + WebRtc_enable_robust_validation(aecm->delay_estimator, 0); + + aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT); + if (aecm->real_fft == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + // Init some aecm pointers. 16 and 32 byte alignment is only necessary + // for Neon code currently. + aecm->xBuf = (int16_t*)(((uintptr_t)aecm->xBuf_buf + 31) & ~31); + aecm->dBufClean = (int16_t*)(((uintptr_t)aecm->dBufClean_buf + 31) & ~31); + aecm->dBufNoisy = (int16_t*)(((uintptr_t)aecm->dBufNoisy_buf + 31) & ~31); + aecm->outBuf = (int16_t*)(((uintptr_t)aecm->outBuf_buf + 15) & ~15); + aecm->channelStored = + (int16_t*)(((uintptr_t)aecm->channelStored_buf + 15) & ~15); + aecm->channelAdapt16 = + (int16_t*)(((uintptr_t)aecm->channelAdapt16_buf + 15) & ~15); + aecm->channelAdapt32 = + (int32_t*)(((uintptr_t)aecm->channelAdapt32_buf + 31) & ~31); + + return aecm; +} + +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) { + int i = 0; + + // Reset the stored channel + memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1); + // Reset the adapted channels + memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1); + for (i = 0; i < PART_LEN1; i++) { + aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16; + } + + // Reset channel storing variables + aecm->mseAdaptOld = 1000; + aecm->mseStoredOld = 1000; + aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX; + aecm->mseChannelCount = 0; +} + +static void CalcLinearEnergiesC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN1; i++) { + echo_est[i] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); + (*far_energy) += (uint32_t)(far_spectrum[i]); + *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + (*echo_energy_stored) += (uint32_t)echo_est[i]; + } +} + +static void StoreAdaptiveChannelC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + echo_est[i] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); + echo_est[i + 1] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], far_spectrum[i + 1]); + echo_est[i + 2] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], far_spectrum[i + 2]); + echo_est[i + 3] = + WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], far_spectrum[i + 3]); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); +} + +static void ResetAdaptiveChannelC(AecmCore* aecm) { + int i; + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; + aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16; + aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16; + aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16; + } + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} + +// Initialize function pointers for ARM Neon platform. +#if defined(WEBRTC_HAS_NEON) +static void WebRtcAecm_InitNeon(void) { + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon; + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon; +} +#endif + +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) { +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore(...) Input: +// - aecm : Pointer to the Echo Suppression instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) { + int i = 0; + int32_t tmp32 = PART_LEN1 * PART_LEN1; + int16_t tmp16 = PART_LEN1; + + if (samplingFreq != 8000 && samplingFreq != 16000) { + samplingFreq = 8000; + return -1; + } + // sanity check of sampling frequency + aecm->mult = (int16_t)samplingFreq / 8000; + + aecm->farBufWritePos = 0; + aecm->farBufReadPos = 0; + aecm->knownDelay = 0; + aecm->lastKnownDelay = 0; + + WebRtc_InitBuffer(aecm->farFrameBuf); + WebRtc_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtc_InitBuffer(aecm->nearCleanFrameBuf); + WebRtc_InitBuffer(aecm->outFrameBuf); + + memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf)); + memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf)); + memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf)); + memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf)); + + aecm->seed = 666; + aecm->totCount = 0; + + if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { + return -1; + } + // Set far end histories to zero + memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY); + memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY); + aecm->far_history_pos = MAX_DELAY; + + aecm->nlpFlag = 1; + aecm->fixedDelay = -1; + + aecm->dfaCleanQDomain = 0; + aecm->dfaCleanQDomainOld = 0; + aecm->dfaNoisyQDomain = 0; + aecm->dfaNoisyQDomainOld = 0; + + memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy)); + aecm->farLogEnergy = 0; + memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy)); + memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy)); + + // Initialize the echo channels with a stored shape. + if (samplingFreq == 8000) { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz); + } else { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz); + } + + memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt)); + memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt)); + aecm->noiseEstCtr = 0; + + aecm->cngMode = AecmTrue; + + memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr)); + memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr)); + // Shape the initial noise level to an approximate pink noise. + for (i = 0; i < (PART_LEN1 >> 1) - 1; i++) { + aecm->noiseEst[i] = (tmp32 << 8); + tmp16--; + tmp32 -= (int32_t)((tmp16 << 1) + 1); + } + for (; i < PART_LEN1; i++) { + aecm->noiseEst[i] = (tmp32 << 8); + } + + aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX; + aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN; + aecm->farEnergyMaxMin = 0; + aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection + // at the beginning. + aecm->farEnergyMSE = 0; + aecm->currentVADValue = 0; + aecm->vadUpdateCount = 0; + aecm->firstVAD = 1; + + aecm->startupState = 0; + aecm->supGain = SUPGAIN_DEFAULT; + aecm->supGainOld = SUPGAIN_DEFAULT; + + aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + + // Assert a preprocessor definition at compile-time. It's an assumption + // used in assembly code, so check the assembly files before any change. + static_assert(PART_LEN % 16 == 0, "PART_LEN is not a multiple of 16"); + + // Initialize function pointers. + WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC; + WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC; + WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC; + +#if defined(WEBRTC_HAS_NEON) + WebRtcAecm_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif + return 0; +} + +// TODO(bjornv): This function is currently not used. Add support for these +// parameters from a higher level +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) { + aecm->nlpFlag = nlpFlag; + aecm->fixedDelay = delay; + + return 0; +} + +void WebRtcAecm_FreeCore(AecmCore* aecm) { + if (aecm == NULL) { + return; + } + + WebRtc_FreeBuffer(aecm->farFrameBuf); + WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtc_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtc_FreeBuffer(aecm->outFrameBuf); + + WebRtc_FreeDelayEstimator(aecm->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend); + WebRtcSpl_FreeRealFFT(aecm->real_fft); + + free(aecm); +} + +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out) { + int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. + int16_t* outBlock = (int16_t*)(((uintptr_t)outBlock_buf + 15) & ~15); + + int16_t farFrame[FRAME_LEN]; + const int16_t* out_ptr = NULL; + int size = 0; + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN); + WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + if (nearendClean != NULL) { + WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN) { + int16_t far_block[PART_LEN]; + const int16_t* far_block_ptr = NULL; + int16_t near_noisy_block[PART_LEN]; + const int16_t* near_noisy_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->farFrameBuf, (void**)&far_block_ptr, far_block, + PART_LEN); + WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf, (void**)&near_noisy_block_ptr, + near_noisy_block, PART_LEN); + if (nearendClean != NULL) { + int16_t near_clean_block[PART_LEN]; + const int16_t* near_clean_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->nearCleanFrameBuf, (void**)&near_clean_block_ptr, + near_clean_block, PART_LEN); + if (WebRtcAecm_ProcessBlock(aecm, far_block_ptr, near_noisy_block_ptr, + near_clean_block_ptr, outBlock) == -1) { + return -1; + } + } else { + if (WebRtcAecm_ProcessBlock(aecm, far_block_ptr, near_noisy_block_ptr, + NULL, outBlock) == -1) { + return -1; + } + } + + WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = (int)WebRtc_available_read(aecm->outFrameBuf); + if (size < FRAME_LEN) { + WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN); + } + + // Obtain an output frame. + WebRtc_ReadBuffer(aecm->outFrameBuf, (void**)&out_ptr, out, FRAME_LEN); + if (out_ptr != out) { + // ReadBuffer() hasn't copied to `out` in this case. + memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t)); + } + + return 0; +} + +// WebRtcAecm_AsymFilt(...) +// +// Performs asymmetric filtering. +// +// Inputs: +// - filtOld : Previous filtered value. +// - inVal : New input value. +// - stepSizePos : Step size when we have a positive contribution. +// - stepSizeNeg : Step size when we have a negative contribution. +// +// Output: +// +// Return: - Filtered value. +// +int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, + const int16_t inVal, + const int16_t stepSizePos, + const int16_t stepSizeNeg) { + int16_t retVal; + + if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) { + return inVal; + } + retVal = filtOld; + if (filtOld > inVal) { + retVal -= (filtOld - inVal) >> stepSizeNeg; + } else { + retVal += (inVal - filtOld) >> stepSizePos; + } + + return retVal; +} + +// ExtractFractionPart(a, zeros) +// +// returns the fraction part of `a`, with `zeros` number of leading zeros, as an +// int16_t scaled to Q8. There is no sanity check of `a` in the sense that the +// number of zeros match. +static int16_t ExtractFractionPart(uint32_t a, int zeros) { + return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23); +} + +// Calculates and returns the log of `energy` in Q8. The input `energy` is +// supposed to be in Q(`q_domain`). +static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) { + static const int16_t kLogLowValue = PART_LEN_SHIFT << 7; + int16_t log_energy_q8 = kLogLowValue; + if (energy > 0) { + int zeros = WebRtcSpl_NormU32(energy); + int16_t frac = ExtractFractionPart(energy, zeros); + // log2 of `energy` in Q8. + log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8); + } + return log_energy_q8; +} + +// WebRtcAecm_CalcEnergies(...) +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, i.e. +// internal VAD. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Pointer to farend spectrum. +// @param far_q [in] Q-domain of farend spectrum. +// @param nearEner [in] Near end energy for current block in +// Q(aecm->dfaQDomain). +// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst) { + // Local variables + uint32_t tmpAdapt = 0; + uint32_t tmpStored = 0; + uint32_t tmpFar = 0; + + int i; + + int16_t tmp16; + int16_t increase_max_shifts = 4; + int16_t decrease_max_shifts = 11; + int16_t increase_min_shifts = 11; + int16_t decrease_min_shifts = 3; + + // Get log of near end energy and store in buffer + + // Shift buffer + memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of integrated magnitude spectrum (nearEner) + aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain); + + WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, + &tmpStored); + + // Shift buffers + memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of delayed far end energy + aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q); + + // Logarithm of estimated echo energy through adapted channel + aecm->echoAdaptLogEnergy[0] = + LogOfEnergyInQ8(tmpAdapt, RESOLUTION_CHANNEL16 + far_q); + + // Logarithm of estimated echo energy through stored channel + aecm->echoStoredLogEnergy[0] = + LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q); + + // Update farend energy levels (min, max, vad, mse) + if (aecm->farLogEnergy > FAR_ENERGY_MIN) { + if (aecm->startupState == 0) { + increase_max_shifts = 2; + decrease_min_shifts = 2; + increase_min_shifts = 8; + } + + aecm->farEnergyMin = + WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy, + increase_min_shifts, decrease_min_shifts); + aecm->farEnergyMax = + WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy, + increase_max_shifts, decrease_max_shifts); + aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); + + // Dynamic VAD region size + tmp16 = 2560 - aecm->farEnergyMin; + if (tmp16 > 0) { + tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9); + } else { + tmp16 = 0; + } + tmp16 += FAR_ENERGY_VAD_REGION; + + if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024)) { + // In startup phase or VAD update halted + aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; + } else { + if (aecm->farEnergyVAD > aecm->farLogEnergy) { + aecm->farEnergyVAD += + (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6; + aecm->vadUpdateCount = 0; + } else { + aecm->vadUpdateCount++; + } + } + // Put MSE threshold higher than VAD + aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8); + } + + // Update VAD variables + if (aecm->farLogEnergy > aecm->farEnergyVAD) { + if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) { + // We are in startup or have significant dynamics in input speech level + aecm->currentVADValue = 1; + } + } else { + aecm->currentVADValue = 0; + } + if ((aecm->currentVADValue) && (aecm->firstVAD)) { + aecm->firstVAD = 0; + if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) { + // The estimated echo has higher energy than the near end signal. + // This means that the initialization was too aggressive. Scale + // down by a factor 8 + for (i = 0; i < PART_LEN1; i++) { + aecm->channelAdapt16[i] >>= 3; + } + // Compensate the adapted echo energy level accordingly. + aecm->echoAdaptLogEnergy[0] -= (3 << 8); + aecm->firstVAD = 1; + } + } +} + +// WebRtcAecm_CalcStepSize(...) +// +// This function calculates the step size used in channel estimation +// +// +// @param aecm [in] Handle of the AECM instance. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of +// shifts. +// +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) { + int32_t tmp32; + int16_t tmp16; + int16_t mu = MU_MAX; + + // Here we calculate the step size mu used in the + // following NLMS based Channel estimation algorithm + if (!aecm->currentVADValue) { + // Far end energy level too low, no channel update + mu = 0; + } else if (aecm->startupState > 0) { + if (aecm->farEnergyMin >= aecm->farEnergyMax) { + mu = MU_MIN; + } else { + tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); + tmp32 = tmp16 * MU_DIFF; + tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); + mu = MU_MIN - 1 - (int16_t)(tmp32); + // The -1 is an alternative to rounding. This way we get a larger + // stepsize, so we in some sense compensate for truncation in NLMS + } + if (mu < MU_MAX) { + mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX + } + } + + return mu; +} + +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. NLMS and decision on channel +// storage. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q) +// @param far_q [in] Q-domain of the farend signal +// @param dfa [in] Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// @param mu [in] NLMS step size. +// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst) { + uint32_t tmpU32no1, tmpU32no2; + int32_t tmp32no1, tmp32no2; + int32_t mseStored; + int32_t mseAdapt; + + int i; + + int16_t zerosFar, zerosNum, zerosCh, zerosDfa; + int16_t shiftChFar, shiftNum, shift2ResChan; + int16_t tmp16no1; + int16_t xfaQ, dfaQ; + + // This is the channel estimation algorithm. It is base on NLMS but has a + // variable step length, which was calculated above. + if (mu) { + for (i = 0; i < PART_LEN1; i++) { + // Determine norm of channel and farend to make sure we don't get overflow + // in multiplication + zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); + zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]); + if (zerosCh + zerosFar > 31) { + // Multiplication is safe + tmpU32no1 = + WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], far_spectrum[i]); + shiftChFar = 0; + } else { + // We need to shift down before multiplication + shiftChFar = 32 - zerosCh - zerosFar; + // If zerosCh == zerosFar == 0, shiftChFar is 32. A + // right shift of 32 is undefined. To avoid that, we + // do this check. + tmpU32no1 = + rtc::dchecked_cast( + shiftChFar >= 32 ? 0 : aecm->channelAdapt32[i] >> shiftChFar) * + far_spectrum[i]; + } + // Determine Q-domain of numerator + zerosNum = WebRtcSpl_NormU32(tmpU32no1); + if (dfa[i]) { + zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]); + } else { + zerosDfa = 32; + } + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - RESOLUTION_CHANNEL32 - + far_q + shiftChFar; + if (zerosNum > tmp16no1 + 1) { + xfaQ = tmp16no1; + dfaQ = zerosDfa - 2; + } else { + xfaQ = zerosNum - 2; + dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain - + shiftChFar + xfaQ; + } + // Add in the same Q-domain + tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ); + tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1; + zerosNum = WebRtcSpl_NormW32(tmp32no1); + if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) { + // + // Update is needed + // + // This is what we would like to compute + // + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i]) + // tmp32norm = (i + 1) + // aecm->channelAdapt[i] += (2^mu) * tmp32no1 + // / (tmp32norm * far_spectrum[i]) + // + + // Make sure we don't get overflow in multiplication. + if (zerosNum + zerosFar > 31) { + if (tmp32no1 > 0) { + tmp32no2 = + (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1, far_spectrum[i]); + } else { + tmp32no2 = + -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1, far_spectrum[i]); + } + shiftNum = 0; + } else { + shiftNum = 32 - (zerosNum + zerosFar); + if (tmp32no1 > 0) { + tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i]; + } else { + tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]); + } + } + // Normalize with respect to frequency bin + tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1); + // Make sure we are in the right Q-domain + shift2ResChan = + shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1); + if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan) { + tmp32no2 = WEBRTC_SPL_WORD32_MAX; + } else { + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); + } + aecm->channelAdapt32[i] = + WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2); + if (aecm->channelAdapt32[i] < 0) { + // We can never have negative channel gain + aecm->channelAdapt32[i] = 0; + } + aecm->channelAdapt16[i] = (int16_t)(aecm->channelAdapt32[i] >> 16); + } + } + } + // END: Adaptive channel update + + // Determine if we should store or restore the channel + if ((aecm->startupState == 0) & (aecm->currentVADValue)) { + // During startup we store the channel every block, + // and we recalculate echo estimate + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + } else { + if (aecm->farLogEnergy < aecm->farEnergyMSE) { + aecm->mseChannelCount = 0; + } else { + aecm->mseChannelCount++; + } + // Enough data for validation. Store channel if we can. + if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) { + // We have enough data. + // Calculate MSE of "Adapt" and "Stored" versions. + // It is actually not MSE, but average absolute error. + mseStored = 0; + mseAdapt = 0; + for (i = 0; i < MIN_MSE_COUNT; i++) { + tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i] - + (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseStored += tmp32no2; + + tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i] - + (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseAdapt += tmp32no2; + } + if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt)) & + ((aecm->mseStoredOld << MSE_RESOLUTION) < + (MIN_MSE_DIFF * aecm->mseAdaptOld))) { + // The stored channel has a significantly lower MSE than the adaptive + // one for two consecutive calculations. Reset the adaptive channel. + WebRtcAecm_ResetAdaptiveChannel(aecm); + } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & + (mseAdapt < aecm->mseThreshold) & + (aecm->mseAdaptOld < aecm->mseThreshold)) { + // The adaptive channel has a significantly lower MSE than the stored + // one. The MSE for the adaptive channel has also been low for two + // consecutive calculations. Store the adaptive channel. + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + + // Update threshold + if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) { + aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); + } else { + int scaled_threshold = aecm->mseThreshold * 5 / 8; + aecm->mseThreshold += ((mseAdapt - scaled_threshold) * 205) >> 8; + } + } + + // Reset counter + aecm->mseChannelCount = 0; + + // Store the MSE values. + aecm->mseStoredOld = mseStored; + aecm->mseAdaptOld = mseAdapt; + } + } + // END: Determine if we should store or reset channel estimate. +} + +// CalcSuppressionGain(...) +// +// This function calculates the suppression gain that is used in the Wiener +// filter. +// +// +// @param aecm [i/n] Handle of the AECM instance. +// @param supGain [out] (Return value) Suppression gain with which to scale +// the noise +// level (Q14). +// +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) { + int32_t tmp32no1; + + int16_t supGain = SUPGAIN_DEFAULT; + int16_t tmp16no1; + int16_t dE = 0; + + // Determine suppression gain used in the Wiener filter. The gain is based on + // a mix of far end energy and echo estimation error. Adjust for the far end + // signal level. A low signal level indicates no far end signal, hence we set + // the suppression gain to 0 + if (!aecm->currentVADValue) { + supGain = 0; + } else { + // Adjust for possible double talk. If we have large variations in + // estimation error we likely have double talk (or poor channel). + tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - + ENERGY_DEV_OFFSET); + dE = WEBRTC_SPL_ABS_W16(tmp16no1); + + if (dE < ENERGY_DEV_TOL) { + // Likely no double talk. The better estimation, the more we can suppress + // signal. Update counters + if (dE < SUPGAIN_EPC_DT) { + tmp32no1 = aecm->supGainErrParamDiffAB * dE; + tmp32no1 += (SUPGAIN_EPC_DT >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + supGain = aecm->supGainErrParamA - tmp16no1; + } else { + tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE); + tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16( + tmp32no1, (ENERGY_DEV_TOL - SUPGAIN_EPC_DT)); + supGain = aecm->supGainErrParamD + tmp16no1; + } + } else { + // Likely in double talk. Use default value + supGain = aecm->supGainErrParamD; + } + } + + if (supGain > aecm->supGainOld) { + tmp16no1 = supGain; + } else { + tmp16no1 = aecm->supGainOld; + } + aecm->supGainOld = supGain; + if (tmp16no1 < aecm->supGain) { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } else { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } + + // END: Update suppression gain + + return aecm->supGain; +} + +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen) { + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped + while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN) { + // Write to remaining buffer space before wrapping + writeLen = FAR_BUF_LEN - aecm->farBufWritePos; + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos += writeLen; +} + +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay) { + int readLen = farLen; + int readPos = 0; + int delayChange = knownDelay - aecm->lastKnownDelay; + + aecm->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap + while (aecm->farBufReadPos < 0) { + aecm->farBufReadPos += FAR_BUF_LEN; + } + while (aecm->farBufReadPos > FAR_BUF_LEN - 1) { + aecm->farBufReadPos -= FAR_BUF_LEN; + } + + aecm->lastKnownDelay = knownDelay; + + // Check if read position must be wrapped + while (aecm->farBufReadPos + readLen > FAR_BUF_LEN) { + // Read from remaining buffer space before wrapping + readLen = FAR_BUF_LEN - aecm->farBufReadPos; + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos += readLen; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h new file mode 100644 index 0000000000..3de49315c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.h @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs echo control (suppression) with fft routines in fixed-point. + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aecm/aecm_defines.h" + +struct RealFFT; + +namespace webrtc { + +#ifdef _MSC_VER // visual c++ +#define ALIGN8_BEG __declspec(align(8)) +#define ALIGN8_END +#else // gcc or icc +#define ALIGN8_BEG +#define ALIGN8_END __attribute__((aligned(8))) +#endif + +typedef struct { + int16_t real; + int16_t imag; +} ComplexInt16; + +typedef struct { + int farBufWritePos; + int farBufReadPos; + int knownDelay; + int lastKnownDelay; + int firstVAD; // Parameter to control poorly initialized channels + + RingBuffer* farFrameBuf; + RingBuffer* nearNoisyFrameBuf; + RingBuffer* nearCleanFrameBuf; + RingBuffer* outFrameBuf; + + int16_t farBuf[FAR_BUF_LEN]; + + int16_t mult; + uint32_t seed; + + // Delay estimation variables + void* delay_estimator_farend; + void* delay_estimator; + uint16_t currentDelay; + // Far end history variables + // TODO(bjornv): Replace `far_history` with ring_buffer. + uint16_t far_history[PART_LEN1 * MAX_DELAY]; + int far_history_pos; + int far_q_domains[MAX_DELAY]; + + int16_t nlpFlag; + int16_t fixedDelay; + + uint32_t totCount; + + int16_t dfaCleanQDomain; + int16_t dfaCleanQDomainOld; + int16_t dfaNoisyQDomain; + int16_t dfaNoisyQDomainOld; + + int16_t nearLogEnergy[MAX_BUF_LEN]; + int16_t farLogEnergy; + int16_t echoAdaptLogEnergy[MAX_BUF_LEN]; + int16_t echoStoredLogEnergy[MAX_BUF_LEN]; + + // The extra 16 or 32 bytes in the following buffers are for alignment based + // Neon code. + // It's designed this way since the current GCC compiler can't align a + // buffer in 16 or 32 byte boundaries properly. + int16_t channelStored_buf[PART_LEN1 + 8]; + int16_t channelAdapt16_buf[PART_LEN1 + 8]; + int32_t channelAdapt32_buf[PART_LEN1 + 8]; + int16_t xBuf_buf[PART_LEN2 + 16]; // farend + int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend + int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend + int16_t outBuf_buf[PART_LEN + 8]; + + // Pointers to the above buffers + int16_t* channelStored; + int16_t* channelAdapt16; + int32_t* channelAdapt32; + int16_t* xBuf; + int16_t* dBufClean; + int16_t* dBufNoisy; + int16_t* outBuf; + + int32_t echoFilt[PART_LEN1]; + int16_t nearFilt[PART_LEN1]; + int32_t noiseEst[PART_LEN1]; + int noiseEstTooLowCtr[PART_LEN1]; + int noiseEstTooHighCtr[PART_LEN1]; + int16_t noiseEstCtr; + int16_t cngMode; + + int32_t mseAdaptOld; + int32_t mseStoredOld; + int32_t mseThreshold; + + int16_t farEnergyMin; + int16_t farEnergyMax; + int16_t farEnergyMaxMin; + int16_t farEnergyVAD; + int16_t farEnergyMSE; + int currentVADValue; + int16_t vadUpdateCount; + + int16_t startupState; + int16_t mseChannelCount; + int16_t supGain; + int16_t supGainOld; + + int16_t supGainErrParamA; + int16_t supGainErrParamD; + int16_t supGainErrParamDiffAB; + int16_t supGainErrParamDiffBD; + + struct RealFFT* real_fft; + +#ifdef AEC_DEBUG + FILE* farFile; + FILE* nearFile; + FILE* outFile; +#endif +} AecmCore; + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore() +// +// Allocates the memory needed by the AECM. The memory needs to be +// initialized separately using the WebRtcAecm_InitCore() function. +// Returns a pointer to the instance and a nullptr at failure. +AecmCore* WebRtcAecm_CreateCore(); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FreeCore(...) +// +// This function releases the memory allocated by WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// +void WebRtcAecm_FreeCore(AecmCore* aecm); + +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitEchoPathCore(...) +// +// This function resets the echo channel adaptation with the specified channel. +// Input: +// - aecm : Pointer to the AECM instance +// - echo_path : Pointer to the data that should initialize the echo +// path +// +// Output: +// - aecm : Initialized instance +// +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessFrame(...) +// +// This function processes frames and sends blocks to +// WebRtcAecm_ProcessBlock(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one frame of nearend signal : +// +// +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessBlock(...) +// +// This function is called for every block within one frame +// This function is called by WebRtcAecm_ProcessFrame(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one block of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one block of nearend signal : +// +// +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* noisyClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_BufferFarFrame() +// +// Inserts a frame of data into farend buffer. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + int farLen); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FetchFarFrame() +// +// Read the farend buffer to account for known delay +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// - knownDelay : known delay +// +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + int farLen, + int knownDelay); + +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() +// +// Moves the pointer to the next entry and inserts `far_spectrum` and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + int16_t far_q, + uint32_t nearEner, + int32_t* echoEst); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + int16_t far_q, + const uint16_t* const dfa, + int16_t mu, + int32_t* echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + +/////////////////////////////////////////////////////////////////////////////// +// Some function pointers, for internal functions shared by ARM NEON and +// generic C code. +// +typedef void (*CalcLinearEnergies)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echoEst, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; + +typedef void (*StoreAdaptiveChannel)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); +extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; + +typedef void (*ResetAdaptiveChannel)(AecmCore* aecm); +extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file aecm_core.c, while those for ARM Neon platforms +// are declared below and defined in file aecm_core_neon.c. +#if defined(WEBRTC_HAS_NEON) +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm); +#endif + +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm); +#endif +#endif + +} // namespace webrtc + +#endif diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc new file mode 100644 index 0000000000..d363dd2cfd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc @@ -0,0 +1,671 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/aecm/aecm_core.h" + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +extern "C" { +#include "system_wrappers/include/cpu_features_wrapper.h" +} + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/sanitizer.h" + +namespace webrtc { + +namespace { + +// Square root of Hanning window in Q14. +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 3562, 3951, + 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, + 8364, 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, + 14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851, + 15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384}; + +#ifdef AECM_WITH_ABS_APPROX +// Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +// Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +// Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +// Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +// Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +// Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = tmp32 << shiftFromNearToNoise; + + if (outLShift32 < aecm->noiseEst[i]) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + aecm->noiseEst[i] -= + ((aecm->noiseEst[i] - outLShift32) >> minTrackShift); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } else if ((aecm->noiseEst[i] >> 11) > 0) { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) { + tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise; + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15); + + // Tables are in Q13. + uReal[i] = + (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> 13); + uImag[i] = + (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) { + out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]); + out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]); + } +} + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array `fft` + int16_t scaled_time_signal = time_signal[i] * (1 << time_signal_scaling); + fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14); + scaled_time_signal = time_signal[i + PART_LEN] * (1 << time_signal_scaling); + fft[PART_LEN + i] = (int16_t)( + (scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse `efw` for the inverse FFT output after transferring + // the contents to `fft`. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = + (ifft_out[PART_LEN + i] * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14; + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = + (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } else if (freq_signal[i].imag == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } else { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if (tmp16no1 > tmp16no2) { + max_value = tmp16no1; + min_value = tmp16no2; + } else { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) { + alpha = kAlpha2; + beta = kBeta2; + } else { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)((max_value * alpha) >> 15); + tmp16no2 = (int16_t)((min_value * beta) >> 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + : [tmp32no1] "+&r"(tmp32no1), [tmp32no2] "=r"(tmp32no2) + : [real] "r"(freq_signal[i].real), [imag] "r"(freq_signal[i].imag)); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +} // namespace + +int RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/8200 + WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO(kma): define fft with ComplexInt16. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + int32_t* echoEst32 = (int32_t*)(((uintptr_t)echoEst32_buf + 31) & ~31); + ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = + (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = + TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy, + PART_LEN1, zerosDBufNoisy); + if (delay == -1) { + return -1; + } else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t)far_q; + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = + WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = + 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // `zeros16` is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] * (1 << zeros16); + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] * (1 << dfa_clean_q_domain_diff); + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 * (1 << -qDomainDiff) + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = + WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) { + hnl[i] = 0; + } + } + } + if (hnl[i]) { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) { + hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + for (i = 0; i < PART_LEN1; i++) { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) { + nlpGain = 0; + } else { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) { + hnl[i] = ONE_Q14; + } else { + hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build new file mode 100644 index 0000000000..f0e41cd6bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_gn/moz.build @@ -0,0 +1,293 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core.cc", + "/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc" + ] + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc" + ] + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "ppc64": + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "riscv64": + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + + SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_c.cc" + ] + +Library("aecm_core_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc new file mode 100644 index 0000000000..828aa6d2fb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_mips.cc @@ -0,0 +1,1656 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +namespace { + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 3562, 3951, + 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, + 8364, 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, + 14384, 14571, 14749, 14918, 15079, 15231, 15373, 15506, 15631, 15746, 15851, + 15947, 16034, 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, 64, 68, 320, 324, 192, 196, 448, + 452, 32, 36, 288, 292, 160, 164, 416, 420, 96, 100, 352, 356, 224, 228, + 480, 484, 16, 20, 272, 276, 144, 148, 400, 404, 80, 84, 336, 340, 208, + 212, 464, 468, 48, 52, 304, 308, 176, 180, 432, 436, 112, 116, 368, 372, + 240, 244, 496, 500, 8, 12, 264, 268, 136, 140, 392, 396, 72, 76, 328, + 332, 200, 204, 456, 460, 40, 44, 296, 300, 168, 172, 424, 428, 104, 108, + 360, 364, 232, 236, 488, 492, 24, 28, 280, 284, 152, 156, 408, 412, 88, + 92, 344, 348, 216, 220, 472, 476, 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, 64, 124, 320, 444, 192, 188, 448, + 316, 32, 60, 288, 476, 160, 220, 416, 348, 96, 92, 352, 412, 224, 156, + 480, 284, 16, 28, 272, 492, 144, 236, 400, 364, 80, 108, 336, 428, 208, + 172, 464, 300, 48, 44, 304, 460, 176, 204, 432, 332, 112, 76, 368, 396, + 240, 140, 496, 268, 8, 12, 264, 500, 136, 244, 392, 372, 72, 116, 328, + 436, 200, 180, 456, 308, 40, 52, 296, 468, 168, 212, 424, 340, 104, 84, + 360, 404, 232, 148, 488, 276, 24, 20, 280, 484, 152, 228, 408, 356, 88, + 100, 344, 420, 216, 164, 472, 292, 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260}; + +} // namespace + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + ComplexInt16* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: " + "\n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: " + "\n\t" + ".set pop \n\t" + : [load_ptr] "=&r"(load_ptr), [shift] "=&r"(shift), [hann] "=&r"(hann), + [hann1] "=&r"(hann1), [shift1] "=&r"(shift1), [coefs] "=&r"(coefs), + [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [i] "=&r"(i), [f_coef] "=&r"(f_coef), + [s_coef] "=&r"(s_coef), [store_ptr1] "=&r"(store_ptr1), + [store_ptr2] "=&r"(store_ptr2) + : [time_signal] "r"(time_signal), [coefTable] "r"(coefTable), + [time_signal_scaling] "r"(time_signal_scaling), + [hanning] "r"(WebRtcAecm_kSqrtHanning), [fft] "r"(fft) + : "memory", "hi", "lo"); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[j], $zero, 128 " + "\n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[pfrfi]) " + "\n\t" + "lh %[tmp2], 2(%[pfrfi]) " + "\n\t" + "lh %[tmp3], 4(%[pfrfi]) " + "\n\t" + "lh %[tmp4], 6(%[pfrfi]) " + "\n\t" + "subu %[tmp2], $zero, %[tmp2] " + "\n\t" + "sh %[tmp1], 0(%[pfreq_signal]) " + "\n\t" + "sh %[tmp2], 2(%[pfreq_signal]) " + "\n\t" + "subu %[tmp4], $zero, %[tmp4] " + "\n\t" + "sh %[tmp3], 4(%[pfreq_signal]) " + "\n\t" + "sh %[tmp4], 6(%[pfreq_signal]) " + "\n\t" + "lh %[tmp1], 8(%[pfrfi]) " + "\n\t" + "lh %[tmp2], 10(%[pfrfi]) " + "\n\t" + "lh %[tmp3], 12(%[pfrfi]) " + "\n\t" + "lh %[tmp4], 14(%[pfrfi]) " + "\n\t" + "addiu %[j], %[j], -8 " + "\n\t" + "subu %[tmp2], $zero, %[tmp2] " + "\n\t" + "sh %[tmp1], 8(%[pfreq_signal]) " + "\n\t" + "sh %[tmp2], 10(%[pfreq_signal]) " + "\n\t" + "subu %[tmp4], $zero, %[tmp4] " + "\n\t" + "sh %[tmp3], 12(%[pfreq_signal]) " + "\n\t" + "sh %[tmp4], 14(%[pfreq_signal]) " + "\n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 " + "\n\t" + "bgtz %[j], 1b " + "\n\t" + " addiu %[pfrfi], %[pfrfi], 16 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [j] "=&r"(j), [pfrfi] "+r"(pfrfi), [pfreq_signal] "+r"(pfreq_signal), + [tmp4] "=&r"(tmp4) + : + : "memory"); +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + ComplexInt16* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[i], $zero, 64 " + "\n\t" + "1: " + "\n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 0(%[pefw]) " + "\n\t" + "lh %[tmp_im], 2(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 4(%[pefw]) " + "\n\t" + "lh %[tmp_im], 6(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 8(%[pefw]) " + "\n\t" + "lh %[tmp_im], 10(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) " + "\n\t" + "lh %[tmp_re], 12(%[pefw]) " + "\n\t" + "lh %[tmp_im], 14(%[pefw]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp2] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addu %[pfft], %[fft], %[tmp1] " + "\n\t" + "sh %[tmp_re], 0(%[pfft]) " + "\n\t" + "subu %[tmp_im], $zero, %[tmp_im] " + "\n\t" + "sh %[tmp_im], 2(%[pfft]) " + "\n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 " + "\n\t" + "addiu %[i], %[i], -4 " + "\n\t" + "bgtz %[i], 1b " + "\n\t" + " addiu %[pefw], %[pefw], 16 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i), + [tmp_re] "=&r"(tmp_re), [tmp_im] "=&r"(tmp_im), [pefw] "+r"(pefw), + [pcoefTable_ifft] "+r"(pcoefTable_ifft), [fft] "+r"(fft) + : + : "memory"); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), [i] "=&r"(i), + [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), [ppfft] "+r"(ppfft) + : + : "memory"); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile( + ".set push " + "\n\t" + ".set noreorder " + "\n\t" + "addiu %[i], $zero, 64 " + "\n\t" + "11: " + "\n\t" + "lh %[tmp1], 0(%[pfft]) " + "\n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) " + "\n\t" + "addiu %[i], %[i], -2 " + "\n\t" + "mul %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "lh %[tmp3], 2(%[pfft]) " + "\n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) " + "\n\t" + "mul %[tmp3], %[tmp3], %[tmp4] " + "\n\t" + "addiu %[tmp1], %[tmp1], 8192 " + "\n\t" + "sra %[tmp1], %[tmp1], 14 " + "\n\t" + "addiu %[tmp3], %[tmp3], 8192 " + "\n\t" + "sra %[tmp3], %[tmp3], 14 " + "\n\t" + "bgez %[out_aecm], 1f " + "\n\t" + " negu %[tmp2], %[out_aecm] " + "\n\t" + "srav %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "b 2f " + "\n\t" + " srav %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "1: " + "\n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] " + "\n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] " + "\n\t" + "2: " + "\n\t" + "lh %[tmp4], 0(%[paecm_buf]) " + "\n\t" + "lh %[tmp2], 2(%[paecm_buf]) " + "\n\t" + "addu %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "addu %[tmp1], %[tmp1], %[tmp4] " + "\n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 " + "\n\t" + "sra %[tmp1], %[tmp1], 16 " + "\n\t" + "shll_s.w %[tmp3], %[tmp3], 16 " + "\n\t" + "sra %[tmp3], %[tmp3], 16 " + "\n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 " + "\n\t" + "sra %[tmp2], %[tmp1], 15 " + "\n\t" + "beq %[tmp4], %[tmp2], 3f " + "\n\t" + " ori %[tmp2], $zero, 0x7fff " + "\n\t" + "xor %[tmp1], %[tmp2], %[tmp4] " + "\n\t" + "3: " + "\n\t" + "sra %[tmp2], %[tmp3], 31 " + "\n\t" + "sra %[tmp4], %[tmp3], 15 " + "\n\t" + "beq %[tmp2], %[tmp4], 4f " + "\n\t" + " ori %[tmp4], $zero, 0x7fff " + "\n\t" + "xor %[tmp3], %[tmp4], %[tmp2] " + "\n\t" + "4: " + "\n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) " + "\n\t" + "sh %[tmp1], 0(%[output1]) " + "\n\t" + "sh %[tmp3], 2(%[pfft]) " + "\n\t" + "sh %[tmp3], 2(%[output1]) " + "\n\t" + "lh %[tmp1], 128(%[pfft]) " + "\n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) " + "\n\t" + "mul %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "lh %[tmp3], 130(%[pfft]) " + "\n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) " + "\n\t" + "mul %[tmp3], %[tmp3], %[tmp4] " + "\n\t" + "sra %[tmp1], %[tmp1], 14 " + "\n\t" + "sra %[tmp3], %[tmp3], 14 " + "\n\t" + "bgez %[out_aecm], 5f " + "\n\t" + " negu %[tmp2], %[out_aecm] " + "\n\t" + "srav %[tmp3], %[tmp3], %[tmp2] " + "\n\t" + "b 6f " + "\n\t" + " srav %[tmp1], %[tmp1], %[tmp2] " + "\n\t" + "5: " + "\n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] " + "\n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] " + "\n\t" + "6: " + "\n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 " + "\n\t" + "sra %[tmp1], %[tmp1], 16 " + "\n\t" + "shll_s.w %[tmp3], %[tmp3], 16 " + "\n\t" + "sra %[tmp3], %[tmp3], 16 " + "\n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 " + "\n\t" + "sra %[tmp2], %[tmp1], 15 " + "\n\t" + "beq %[tmp4], %[tmp2], 7f " + "\n\t" + " ori %[tmp2], $zero, 0x7fff " + "\n\t" + "xor %[tmp1], %[tmp2], %[tmp4] " + "\n\t" + "7: " + "\n\t" + "sra %[tmp2], %[tmp3], 31 " + "\n\t" + "sra %[tmp4], %[tmp3], 15 " + "\n\t" + "beq %[tmp2], %[tmp4], 8f " + "\n\t" + " ori %[tmp4], $zero, 0x7fff " + "\n\t" + "xor %[tmp3], %[tmp4], %[tmp2] " + "\n\t" + "8: " + "\n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) " + "\n\t" + "sh %[tmp3], 2(%[paecm_buf]) " + "\n\t" + "addiu %[output1], %[output1], 4 " + "\n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 " + "\n\t" + "addiu %[pfft], %[pfft], 4 " + "\n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 " + "\n\t" + "bgtz %[i], 11b " + "\n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 " + "\n\t" + ".set pop " + "\n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [pfft] "+r"(pfft), + [output1] "+r"(output1), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), + [paecm_buf] "+r"(paecm_buf), [i] "=&r"(i), + [pp_kSqrtHanning] "+r"(pp_kSqrtHanning), + [p_kSqrtHanning] "+r"(p_kSqrtHanning) + : [out_aecm] "r"(out_aecm), + [WebRtcAecm_kSqrtHanning] "r"(WebRtcAecm_kSqrtHanning) + : "hi", "lo", "memory"); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r"(temp0), [stored0] "=&r"(stored0), + [adept0] "=&r"(adept0), [spectrum0] "=&r"(spectrum0), + [echo0] "=&r"(echo0), [echo_p] "+r"(echo_p), [par3] "+r"(par3), + [par1] "+r"(par1), [par2] "+r"(par2), [stored1] "=&r"(stored1), + [adept1] "=&r"(adept1), [echo1] "=&r"(echo1), + [spectrum1] "=&r"(spectrum1), [temp1] "=&r"(temp1), + [ch_stored_p] "+r"(ch_stored_p), [ch_adapt_p] "+r"(ch_adapt_p), + [spectrum_p] "+r"(spectrum_p) + : + : "hi", "lo", "memory"); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r"(temp0), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), + [temp1] "+r"(temp1), [temp8] "+r"(temp8), [temp7] "+r"(temp7) + : + : "hi", "lo", "memory"); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp4] "=&r"(temp4), + [temp5] "=&r"(temp5), [temp3] "+r"(temp3), [temp0] "+r"(temp0) + : + : "memory"); + } + + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = + (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } else if (freq_signal[i].imag == 0) { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } else { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(`imag`,`real`) + beta * min(`imag`,`real`) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = + (uint32_t)(freq_signal_abs[0]) + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1), [freqt2] "=&r"(freqt2), + [freqp] "+r"(freqp), [tmp32no20] "=r"(tmp32no20), + [tmp32no21] "=r"(tmp32no21), [tmp32no22] "=r"(tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i += 4) { + __asm __volatile( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r"(freqt0), [freqt1] "=&r"(freqt1), + [freqt2] "=&r"(freqt2), [freqt3] "=&r"(freqt3), + [tmp32no20] "=r"(tmp32no20), [tmp32no21] "=r"(tmp32no21), + [tmp32no22] "=r"(tmp32no22), [tmp32no23] "=r"(tmp32no23), + [freqabsp] "+r"(freqabsp), [freqp] "+r"(freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo", + "$ac3hi", "$ac3lo"); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r"(freqs) + : [tmp32no10] "r"(tmp32no10), [tmp32no11] "r"(tmp32no11), + [tmp32no12] "r"(tmp32no12), [tmp32no13] "r"(tmp32no13), + [freqabsp] "r"(freqabsp) + : "memory"); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31); + ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = + (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, aecm->xBuf, dfw, xfa, &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = + TimeToFrequencyDomain(aecm, aecm->dBufNoisy, dfw, dfaNoisy, &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, aecm->dBufClean, dfw, dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, dfaNoisy, + PART_LEN1, zerosDBufNoisy); + if (delay == -1) { + return -1; + } else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t)far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = + WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = + 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // `zeros16` is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = + qDomainDiff < 0 ? tmp16no2 << -qDomainDiff : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = + WebRtcSpl_DivU32U16(echoEst32Gained, (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), + [temp7] "=&r"(temp7), [temp8] "=&r"(temp8), [ptr1] "+r"(ptr1) + : + : "memory", "hi", "lo"); + } + for (i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r"(temp1), [ptr1] "+r"(ptr1) + : + : "memory", "hi", "lo"); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr), + [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr) + : + : "memory", "hi", "lo"); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), + [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [ptr] "+r"(ptr), + [er_ptr] "+r"(er_ptr), [dr_ptr] "+r"(dr_ptr) + : + : "memory", "hi", "lo"); +#endif + } + } + } else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, hnl[i], 14)); + efw[i].imag = (int16_t)( + WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined(MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i += 2) { + // Shift to the noise domain. + __asm __volatile( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r"(tmp32), [outLShift32] "=r"(outLShift32), + [tnoise] "=&r"(tnoise) + : [tmp1] "r"(tmp1), [dfap] "r"(dfap), + [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r"(tmp32), [tnoise] "+r"(tnoise) + : + [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift)); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r"(tnoise) + : [c2049] "r"(c2049) + : "hi", "lo"); + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r"(tnoise), [tmp32] "=&r"(tmp32) + :); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r"(tnoise) + : [c2049] "r"(c2049) + : "hi", "lo"); + } + } + + // Shift to the noise domain. + __asm __volatile( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r"(tmp32), [dfap] "+r"(dfap), + [outLShift32] "=r"(outLShift32), [tnoise1] "=&r"(tnoise1) + : [tmp1] "r"(tmp1), [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // `kNoiseEstIncCount` block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r"(tmp32), [tnoise1] "+r"(tnoise1) + : + [outLShift32] "r"(outLShift32), [minTrackShift] "r"(minTrackShift)); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r"(tnoise1) + : [c2049] "r"(c2049) + : "hi", "lo"); + } else { + // Make incremental increases based on size every + // `kNoiseEstIncCount` block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r"(tnoise1), [tmp32] "=&r"(tmp32) + :); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r"(tnoise1) + : [c2049] "r"(c2049) + : "hi", "lo"); + } + } + + __asm __volatile( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r"(tmp16), [tmp161] "=&r"(tmp161), [tmp1] "+r"(tmp1), + [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [lambdap] "+r"(lambdap) + : [tnoise] "r"(tnoise), [tnoise1] "r"(tnoise1), [c114] "r"(c114), + [shiftFromNearToNoise] "r"(shiftFromNearToNoise) + : "memory"); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i + 1] = tmp321 << shiftFromNearToNoise; + } + + __asm __volatile( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=&r"(nrsh1), [nrsh2] "=r"(nrsh2) + : [tmp16] "r"(tmp16), [tmp161] "r"(tmp161), [tmp32] "r"(tmp32), + [tmp321] "r"(tmp321) + : "memory", "hi", "lo"); + + __asm __volatile( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r"(randW16p), [tmp32] "=&r"(tmp32), [tmp16] "=r"(tmp16), + [tmp161] "=r"(tmp161), [tmp321] "=&r"(tmp321) + : [c359] "r"(c359) + : "memory", "hi", "lo"); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r"(tmp32), [tmp321] "=&r"(tmp321), [tmp322] "=&r"(tmp322), + [tmp323] "=&r"(tmp323) + : [kCosTablep] "r"(kCosTablep), [tmp16] "r"(tmp16), + [tmp161] "r"(tmp161), [kSinTablep] "r"(kSinTablep) + : "memory"); +#endif + __asm __volatile( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r"(tmp32), [tmp321] "+r"(tmp321), [tmp162] "=&r"(tmp162), + [tmp322] "+r"(tmp322), [tmp323] "+r"(tmp323), [tmp163] "=&r"(tmp163) + : [nrsh1] "r"(nrsh1), [nrsh2] "r"(nrsh2) + : "hi", "lo"); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc new file mode 100644 index 0000000000..584110d3af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_core_neon.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "common_audio/signal_processing/include/real_fft.h" +#include "modules/audio_processing/aecm/aecm_core.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// TODO(kma): Re-write the corresponding assembly file, the offset +// generating script and makefile, to replace these C functions. + +static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { +#if defined(WEBRTC_ARCH_ARM64) + *(ptr) = vaddvq_u32(v); +#else + uint32x2_t tmp_v; + tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + tmp_v = vpadd_u32(tmp_v, tmp_v); + *(ptr) = vget_lane_u32(tmp_v, 0); +#endif +} + +} // namespace + +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt_p = aecm->channelAdapt16; + int32_t* echo_est_p = echo_est; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + const uint16_t* far_spectrum_p = far_spectrum; + int16x8_t store_v, adapt_v; + uint16x8_t spectrum_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v; + + far_energy_v = vdupq_n_u32(0); + echo_adapt_v = vdupq_n_u32(0); + echo_stored_v = vdupq_n_u32(0); + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + // The C code: + // for (i = 0; i < PART_LEN1; i++) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // (*far_energy) += (uint32_t)(far_spectrum[i]); + // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + // (*echo_energy_stored) += (uint32_t)echo_est[i]; + // } + while (start_stored_p < end_stored_p) { + spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + store_v = vld1q_s16(start_stored_p); + + far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v)); + far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v)); + + echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)), + vget_low_u16(spectrum_v)); + echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)), + vget_high_u16(spectrum_v)); + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v); + echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v); + + echo_adapt_v = + vmlal_u16(echo_adapt_v, vreinterpret_u16_s16(vget_low_s16(adapt_v)), + vget_low_u16(spectrum_v)); + echo_adapt_v = + vmlal_u16(echo_adapt_v, vreinterpret_u16_s16(vget_high_s16(adapt_v)), + vget_high_u16(spectrum_v)); + + start_stored_p += 8; + start_adapt_p += 8; + far_spectrum_p += 8; + echo_est_p += 8; + } + + AddLanes(far_energy, far_energy_v); + AddLanes(echo_energy_stored, echo_stored_v); + AddLanes(echo_energy_adapt, echo_adapt_v); + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; + *far_energy += (uint32_t)far_spectrum[PART_LEN]; + *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; +} + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + RTC_DCHECK_EQ(0, (uintptr_t)echo_est % 32); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + + // This is C code of following optimized code. + // During startup we store the channel every block. + // memcpy(aecm->channelStored, + // aecm->channelAdapt16, + // sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + // for (i = 0; i < PART_LEN; i += 4) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + // far_spectrum[i + 1]); + // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + // far_spectrum[i + 2]); + // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + // far_spectrum[i + 3]); + // } + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + const uint16_t* far_spectrum_p = far_spectrum; + int16_t* start_adapt_p = aecm->channelAdapt16; + int16_t* start_stored_p = aecm->channelStored; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + int32_t* echo_est_p = echo_est; + + uint16x8_t far_spectrum_v; + int16x8_t adapt_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + + while (start_stored_p < end_stored_p) { + far_spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + + vst1q_s16(start_stored_p, adapt_v); + + echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v), + vget_low_u16(vreinterpretq_u16_s16(adapt_v))); + echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v), + vget_high_u16(vreinterpretq_u16_s16(adapt_v))); + + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + far_spectrum_p += 8; + start_adapt_p += 8; + start_stored_p += 8; + echo_est_p += 8; + } + aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); +} + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt32 % 32); + + // The C code of following optimized code. + // for (i = 0; i < PART_LEN1; i++) { + // aecm->channelAdapt16[i] = aecm->channelStored[i]; + // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + // (int32_t)aecm->channelStored[i], 16); + // } + + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt16_p = aecm->channelAdapt16; + int32_t* start_adapt32_p = aecm->channelAdapt32; + const int16_t* end_stored_p = start_stored_p + PART_LEN; + + int16x8_t stored_v; + int32x4_t adapt32_v_low, adapt32_v_high; + + while (start_stored_p < end_stored_p) { + stored_v = vld1q_s16(start_stored_p); + vst1q_s16(start_adapt16_p, stored_v); + + adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16); + adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16); + + vst1q_s32(start_adapt32_p, adapt32_v_low); + vst1q_s32(start_adapt32_p + 4, adapt32_v_high); + + start_stored_p += 8; + start_adapt16_p += 8; + start_adapt32_p += 8; + } + aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; + aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h new file mode 100644 index 0000000000..5805549e2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/aecm_defines.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ + +#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ + +/* Algorithm parameters */ +#define FRAME_LEN 80 /* Total frame length, 10 ms. */ + +#define PART_LEN 64 /* Length of partition. */ +#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ + +#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ +#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ +#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */ +#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */ +#define MAX_DELAY 100 + +/* Counter parameters */ +#define CONV_LEN 512 /* Convergence length used at startup. */ +#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ + +/* Energy parameters */ +#define MAX_BUF_LEN 64 /* History length of energy signals. */ +#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */ + /* in energy. */ +#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */ + /* and min. */ +#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */ +#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */ +#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */ + +/* Stepsize parameters */ +#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */ + /* dependent). */ +#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */ + /* dependent). */ +#define MU_DIFF 9 /* MU_MIN - MU_MAX */ + +/* Channel parameters */ +#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */ + /* far end energy to compare channel estimates. */ +#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */ + /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */ +#define MSE_RESOLUTION 5 /* MSE parameter resolution. */ +#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */ +#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */ +#define CHANNEL_VAD 16 /* Minimum energy in frequency band */ + /* to update channel. */ + +/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */ +#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */ +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */ +#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */ + /* (Maximum gain) (8 in Q8). */ +#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */ + /* (Gain before going down). */ +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */ +/* (Should be the same as Default) (1 in Q8). */ +#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */ + +/* Defines for "check delay estimation" */ +#define CORR_WIDTH 31 /* Number of samples to correlate over. */ +#define CORR_MAX 16 /* Maximum correlation offset. */ +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */ + +#define ONE_Q14 (1 << 14) + +/* NLP defines */ +#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */ +#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */ + +#endif diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc new file mode 100644 index 0000000000..14522c0f1d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/echo_control_mobile.h" + +#ifdef AEC_DEBUG +#include +#endif +#include +#include + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aecm/aecm_defines.h" +} +#include "modules/audio_processing/aecm/aecm_core.h" + +namespace webrtc { + +namespace { + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const size_t kBufSizeSamp = + BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int kSampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const int kInitCheck = 42; + +typedef struct { + int sampFreq; + int scSampFreq; + short bufSizeStart; + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + + int16_t echoMode; + +#ifdef AEC_DEBUG + FILE* bufFile; + FILE* delayFile; + FILE* preCompFile; + FILE* postCompFile; +#endif // AEC_DEBUG + // Structures + RingBuffer* farendBuf; + + AecmCore* aecmCore; +} AecMobile; + +} // namespace + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int WebRtcAecm_DelayComp(AecMobile* aecm); + +void* WebRtcAecm_Create() { + // Allocate zero-filled memory. + AecMobile* aecm = static_cast(calloc(1, sizeof(AecMobile))); + + aecm->aecmCore = WebRtcAecm_CreateCore(); + if (!aecm->aecmCore) { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp, sizeof(int16_t)); + if (!aecm->farendBuf) { + WebRtcAecm_Free(aecm); + return NULL; + } + +#ifdef AEC_DEBUG + aecm->aecmCore->farFile = fopen("aecFar.pcm", "wb"); + aecm->aecmCore->nearFile = fopen("aecNear.pcm", "wb"); + aecm->aecmCore->outFile = fopen("aecOut.pcm", "wb"); + // aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecm->bufFile = fopen("aecBuf.dat", "wb"); + aecm->delayFile = fopen("aecDelay.dat", "wb"); + aecm->preCompFile = fopen("preComp.pcm", "wb"); + aecm->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + return aecm; +} + +void WebRtcAecm_Free(void* aecmInst) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) { + return; + } + +#ifdef AEC_DEBUG + fclose(aecm->aecmCore->farFile); + fclose(aecm->aecmCore->nearFile); + fclose(aecm->aecmCore->outFile); + // fclose(aecm->aecmCore->outLpFile); + + fclose(aecm->bufFile); + fclose(aecm->delayFile); + fclose(aecm->preCompFile); + fclose(aecm->postCompFile); +#endif // AEC_DEBUG + WebRtcAecm_FreeCore(aecm->aecmCore); + WebRtc_FreeBuffer(aecm->farendBuf); + free(aecm); +} + +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq) { + AecMobile* aecm = static_cast(aecmInst); + AecmConfig aecConfig; + + if (aecm == NULL) { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->sampFreq = sampFreq; + + // Initialize AECM core + if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) { + return AECM_UNSPECIFIED_ERROR; + } + + // Initialize farend buffer + WebRtc_InitBuffer(aecm->farendBuf); + + aecm->initFlag = kInitCheck; // indicates that initialization has been done + + aecm->delayChange = 1; + + aecm->sum = 0; + aecm->counter = 0; + aecm->checkBuffSize = 1; + aecm->firstVal = 0; + + aecm->ECstartup = 1; + aecm->bufSizeStart = 0; + aecm->checkBufSizeCtr = 0; + aecm->filtDelay = 0; + aecm->timeForDelayChange = 0; + aecm->knownDelay = 0; + aecm->lastDelayDiff = 0; + + memset(&aecm->farendOld, 0, sizeof(aecm->farendOld)); + + // Default settings. + aecConfig.cngMode = AecmTrue; + aecConfig.echoMode = 3; + + if (WebRtcAecm_set_config(aecm, aecConfig) == -1) { + return AECM_UNSPECIFIED_ERROR; + } + + return 0; +} + +// Returns any error that is caused when buffering the +// farend signal. +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) + return -1; + + if (farend == NULL) + return AECM_NULL_POINTER_ERROR; + + if (aecm->initFlag != kInitCheck) + return AECM_UNINITIALIZED_ERROR; + + if (nrOfSamples != 80 && nrOfSamples != 160) + return AECM_BAD_PARAMETER_ERROR; + + return 0; +} + +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast(aecmInst); + + const int32_t err = + WebRtcAecm_GetBufferFarendError(aecmInst, farend, nrOfSamples); + + if (err != 0) + return err; + + // TODO(unknown): Is this really a good idea? + if (!aecm->ECstartup) { + WebRtcAecm_DelayComp(aecm); + } + + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return 0; +} + +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf) { + AecMobile* aecm = static_cast(aecmInst); + int32_t retVal = 0; + size_t i; + short nmbrOfFilledBuffers; + size_t nBlocks10ms; + size_t nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + + if (aecm == NULL) { + return -1; + } + + if (nearendNoisy == NULL) { + return AECM_NULL_POINTER_ERROR; + } + + if (out == NULL) { + return AECM_NULL_POINTER_ERROR; + } + + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) { + return AECM_BAD_PARAMETER_ERROR; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + retVal = AECM_BAD_PARAMETER_WARNING; + } else if (msInSndCardBuf > 500) { + msInSndCardBuf = 500; + retVal = AECM_BAD_PARAMETER_WARNING; + } + msInSndCardBuf += 10; + aecm->msInSndCardBuf = msInSndCardBuf; + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecm->aecmCore->mult; + + if (aecm->ECstartup) { + if (nearendClean == NULL) { + if (out != nearendNoisy) { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } + } else if (out != nearendClean) { + memcpy(out, nearendClean, sizeof(short) * nrOfSamples); + } + + nmbrOfFilledBuffers = + (short)WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + // The AECM is in the start up mode + // AECM is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecm->checkBuffSize) { + aecm->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on + // the sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it + // seems to be stable then we start to fill up the far end buffer. + + if (aecm->counter == 0) { + aecm->firstVal = aecm->msInSndCardBuf; + aecm->sum = 0; + } + + if (abs(aecm->firstVal - aecm->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb)) { + aecm->sum += aecm->msInSndCardBuf; + aecm->counter++; + } else { + aecm->counter = 0; + } + + if (aecm->counter * nBlocks10ms >= 6) { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecm->bufSizeStart = WEBRTC_SPL_MIN( + (3 * aecm->sum * aecm->aecmCore->mult) / (aecm->counter * 40), + BUF_SIZE_FRAMES); + // buffersize has now been determined + aecm->checkBuffSize = 0; + } + + if (aecm->checkBufSizeCtr * nBlocks10ms > 50) { + // for really bad sound cards, don't disable echocanceller for more than + // 0.5 sec + aecm->bufSizeStart = WEBRTC_SPL_MIN( + (3 * aecm->msInSndCardBuf * aecm->aecmCore->mult) / 40, + BUF_SIZE_FRAMES); + aecm->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecm->checkBuffSize) { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and + // start to cancel echoes. + + if (nmbrOfFilledBuffers == aecm->bufSizeStart) { + aecm->ECstartup = 0; // Enable the AECM + } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) { + WebRtc_MoveReadPtr(aecm->farendBuf, + (int)WebRtc_available_read(aecm->farendBuf) - + (int)aecm->bufSizeStart * FRAME_LEN); + aecm->ECstartup = 0; + } + } + + } else { + // AECM is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) { + int16_t farend[FRAME_LEN]; + const int16_t* farend_ptr = NULL; + + nmbrOfFilledBuffers = + (short)WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) { + // Get the next 80 samples from the farend buffer + WebRtc_ReadBuffer(aecm->farendBuf, (void**)&farend_ptr, farend, + FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecm->farendOld[i][0]), farend_ptr, FRAME_LEN * sizeof(short)); + } else { + // We have no data so we use the last played frame + memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + farend_ptr = farend; + } + + // Call buffer delay estimator when all data is extracted, + // i,e. i = 0 for NB and i = 1 for WB + if ((i == 0 && aecm->sampFreq == 8000) || + (i == 1 && aecm->sampFreq == 16000)) { + WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); + } + + // Call the AECM + /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], + &out[FRAME_LEN * i], aecm->knownDelay);*/ + if (WebRtcAecm_ProcessFrame( + aecm->aecmCore, farend_ptr, &nearendNoisy[FRAME_LEN * i], + (nearendClean ? &nearendClean[FRAME_LEN * i] : NULL), + &out[FRAME_LEN * i]) == -1) + return -1; + } + } + +#ifdef AEC_DEBUG + msInAECBuf = (short)WebRtc_available_read(aecm->farendBuf) / + (kSampMsNb * aecm->aecmCore->mult); + fwrite(&msInAECBuf, 2, 1, aecm->bufFile); + fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); +#endif + + return retVal; +} + +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config) { + AecMobile* aecm = static_cast(aecmInst); + + if (aecm == NULL) { + return -1; + } + + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->aecmCore->cngMode = config.cngMode; + + if (config.echoMode < 0 || config.echoMode > 4) { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->echoMode = config.echoMode; + + if (aecm->echoMode == 0) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 3) - (SUPGAIN_ERROR_PARAM_B >> 3); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 3) - (SUPGAIN_ERROR_PARAM_D >> 3); + } else if (aecm->echoMode == 1) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 2) - (SUPGAIN_ERROR_PARAM_B >> 2); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 2) - (SUPGAIN_ERROR_PARAM_D >> 2); + } else if (aecm->echoMode == 2) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A >> 1) - (SUPGAIN_ERROR_PARAM_B >> 1); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B >> 1) - (SUPGAIN_ERROR_PARAM_D >> 1); + } else if (aecm->echoMode == 3) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->aecmCore->supGainErrParamDiffAB = + SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->aecmCore->supGainErrParamDiffBD = + SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + } else if (aecm->echoMode == 4) { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1; + aecm->aecmCore->supGainErrParamDiffAB = + (SUPGAIN_ERROR_PARAM_A << 1) - (SUPGAIN_ERROR_PARAM_B << 1); + aecm->aecmCore->supGainErrParamDiffBD = + (SUPGAIN_ERROR_PARAM_B << 1) - (SUPGAIN_ERROR_PARAM_D << 1); + } + + return 0; +} + +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes) { + AecMobile* aecm = static_cast(aecmInst); + const int16_t* echo_path_ptr = static_cast(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr); + + return 0; +} + +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes) { + AecMobile* aecm = static_cast(aecmInst); + int16_t* echo_path_ptr = static_cast(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) { + return AECM_UNINITIALIZED_ERROR; + } + + memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes); + return 0; +} + +size_t WebRtcAecm_echo_path_size_bytes() { + return (PART_LEN1 * sizeof(int16_t)); +} + +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { + short delayNew, nSampSndCard; + short nSampFar = (short)WebRtc_available_read(aecm->farendBuf); + short diff; + + nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + + delayNew = nSampSndCard - nSampFar; + + if (delayNew < FRAME_LEN) { + WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecm->filtDelay = + WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10); + + diff = aecm->filtDelay - aecm->knownDelay; + if (diff > 224) { + if (aecm->lastDelayDiff < 96) { + aecm->timeForDelayChange = 0; + } else { + aecm->timeForDelayChange++; + } + } else if (diff < 96 && aecm->knownDelay > 0) { + if (aecm->lastDelayDiff > 224) { + aecm->timeForDelayChange = 0; + } else { + aecm->timeForDelayChange++; + } + } else { + aecm->timeForDelayChange = 0; + } + aecm->lastDelayDiff = diff; + + if (aecm->timeForDelayChange > 25) { + aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0); + } + return 0; +} + +static int WebRtcAecm_DelayComp(AecMobile* aecm) { + int nSampFar = (int)WebRtc_available_read(aecm->farendBuf); + int nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + delayNew = nSampSndCard - nSampFar; + + if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult) { + // The difference of the buffer sizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = + (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar), FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd); + aecm->delayChange = 1; // the delay needs to be updated + } + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h new file mode 100644 index 0000000000..ee780524de --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aecm/echo_control_mobile.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ + +#include +#include + +namespace webrtc { + +enum { AecmFalse = 0, AecmTrue }; + +// Errors +#define AECM_UNSPECIFIED_ERROR 12000 +#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AECM_UNINITIALIZED_ERROR 12002 +#define AECM_NULL_POINTER_ERROR 12003 +#define AECM_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AECM_BAD_PARAMETER_WARNING 12100 + +typedef struct { + int16_t cngMode; // AECM_FALSE, AECM_TRUE (default) + int16_t echoMode; // 0, 1, 2, 3 (default), 4 +} AecmConfig; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AECM. The memory needs to be + * initialized separately using the WebRtcAecm_Init() function. + * Returns a pointer to the instance and a nullptr at failure. + */ +void* WebRtcAecm_Create(); + +/* + * This function releases the memory allocated by WebRtcAecm_Create() + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + */ +void WebRtcAecm_Free(void* aecmInst); + +/* + * Initializes an AECM instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int32_t sampFreq Sampling frequency of data + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Reports any errors that would arise when buffering a farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Runs the AECM on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* nearendNoisy In buffer containing one frame of + * reference nearend+echo signal. If + * noise reduction is active, provide + * the noisy signal here. + * int16_t* nearendClean In buffer containing one frame of + * nearend+echo signal. If noise + * reduction is active, provide the + * clean signal here. Otherwise pass a + * NULL pointer. + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * + * Outputs Description + * ------------------------------------------------------------------- + * int16_t* out Out buffer, one frame of processed nearend + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); + +/* + * This function enables the user to set the echo path on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to the echo path to be set + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the currently used echo path + * on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to echo path + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the echo path size in bytes + * + * Outputs Description + * ------------------------------------------------------------------- + * size_t return Size in bytes + */ +size_t WebRtcAecm_echo_path_size_bytes(); + +#ifdef __cplusplus +} +#endif + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn new file mode 100644 index 0000000000..75bef1450f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/BUILD.gn @@ -0,0 +1,126 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("gain_control_interface") { + sources = [ "gain_control.h" ] +} + +rtc_library("agc") { + sources = [ + "agc_manager_direct.cc", + "agc_manager_direct.h", + ] + configs += [ "..:apm_debug_dump" ] + deps = [ + ":gain_control_interface", + ":level_estimation", + "..:api", + "..:apm_logging", + "..:audio_buffer", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../agc2:clipping_predictor", + "../agc2:gain_map", + "../agc2:input_volume_stats_reporter", + "../vad", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("level_estimation") { + sources = [ + "agc.cc", + "agc.h", + "loudness_histogram.cc", + "loudness_histogram.h", + "utility.cc", + "utility.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + "../vad", + ] +} + +rtc_library("legacy_agc") { + visibility = [ + ":*", + "..:*", + ] # Only targets in this file and in + # audio_processing can depend on + # this. + + sources = [ + "legacy/analog_agc.cc", + "legacy/analog_agc.h", + "legacy/digital_agc.cc", + "legacy/digital_agc.h", + "legacy/gain_control.h", + ] + + deps = [ + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + if (rtc_build_with_neon) { + if (target_cpu != "arm64") { + # Enable compilation for the NEON instruction set. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + } +} + +if (rtc_include_tests) { + rtc_library("agc_unittests") { + testonly = true + sources = [ + "agc_manager_direct_unittest.cc", + "loudness_histogram_unittest.cc", + "mock_agc.h", + ] + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":agc", + ":gain_control_interface", + ":level_estimation", + "..:mocks", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000000..a018ff9f93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc.h" + +#include +#include +#include + +#include "modules/audio_processing/agc/loudness_histogram.h" +#include "modules/audio_processing/agc/utility.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kDefaultLevelDbfs = -18; +constexpr int kNumAnalysisFrames = 100; +constexpr double kActivityThreshold = 0.3; +constexpr int kNum10msFramesInOneSecond = 100; +constexpr int kMaxSampleRateHz = 384000; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + target_level_dbfs_(kDefaultLevelDbfs), + histogram_(LoudnessHistogram::Create(kNumAnalysisFrames)), + inactive_histogram_(LoudnessHistogram::Create()) {} + +Agc::~Agc() = default; + +void Agc::Process(rtc::ArrayView audio) { + const int sample_rate_hz = audio.size() * kNum10msFramesInOneSecond; + RTC_DCHECK_LE(sample_rate_hz, kMaxSampleRateHz); + vad_.ProcessChunk(audio.data(), audio.size(), sample_rate_hz); + const std::vector& rms = vad_.chunkwise_rms(); + const std::vector& probabilities = + vad_.chunkwise_voice_probabilities(); + RTC_DCHECK_EQ(rms.size(), probabilities.size()); + for (size_t i = 0; i < rms.size(); ++i) { + histogram_->Update(rms[i], probabilities[i]); + } +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + RTC_DCHECK_NOTREACHED(); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +int Agc::target_level_dbfs() const { + return target_level_dbfs_; +} + +float Agc::voice_probability() const { + return vad_.last_voice_probability(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc.h b/third_party/libwebrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000000..da42808225 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" + +namespace webrtc { + +class LoudnessHistogram; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // `audio` must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual void Process(rtc::ArrayView audio); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case `error` should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const; + virtual float voice_probability() const; + + private: + double target_level_loudness_; + int target_level_dbfs_; + std::unique_ptr histogram_; + std::unique_ptr inactive_histogram_; + VoiceActivityDetector vad_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build new file mode 100644 index 0000000000..45e6cad306 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000000..b8ad4a8bb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +constexpr int kLevelQuantizationSlack = 25; + +constexpr int kDefaultCompressionGain = 7; +constexpr int kMaxCompressionGain = 12; +constexpr int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +constexpr float kCompressionGainStep = 0.05f; + +constexpr int kMaxMicLevel = 255; +static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); +constexpr int kMinMicLevel = 12; + +// Prevent very large microphone level changes. +constexpr int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +constexpr int kSurplusCompressionGain = 6; + +// Target speech level (dBFs) and speech probability threshold used to compute +// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for +// computing the error override and they are not passed to `agc_`. +// TODO(webrtc:7494): Move these to a config and pass in the ctor. +constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f; +constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f; +// The minimum number of frames between `UpdateGain()` calls. +// TODO(webrtc:7494): Move this to a config and pass in the ctor with +// kOverrideWaitFrames = 100. Default value zero needed for the unit tests. +constexpr int kOverrideWaitFrames = 0; + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; + +// If the "WebRTC-Audio-2ndAgcMinMicLevelExperiment" field trial is specified, +// parses it and returns a value between 0 and 255 depending on the field-trial +// string. Returns an unspecified value if the field trial is not specified, if +// disabled or if it cannot be parsed. Example: +// 'WebRTC-Audio-2ndAgcMinMicLevelExperiment/Enabled-80' => returns 80. +absl::optional GetMinMicLevelOverride() { + constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + if (!webrtc::field_trial::IsEnabled(kMinMicLevelFieldTrial)) { + return absl::nullopt; + } + const auto field_trial_string = + webrtc::field_trial::FindFullName(kMinMicLevelFieldTrial); + int min_mic_level = -1; + sscanf(field_trial_string.c_str(), "Enabled-%d", &min_mic_level); + if (min_mic_level >= 0 && min_mic_level <= 255) { + return min_mic_level; + } else { + RTC_LOG(LS_WARNING) << "[agc] Invalid parameter for " + << kMinMicLevelFieldTrial << ", ignored."; + return absl::nullopt; + } +} + +int LevelFromGainError(int gain_error, int level, int min_mic_level) { + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, kMaxMicLevel); + if (gain_error == 0) { + return level; + } + + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > min_mic_level) { + --new_level; + } + } + return new_level; +} + +// Returns the proportion of samples in the buffer which are at full-scale +// (and presumably clipped). +float ComputeClippedRatio(const float* const* audio, + size_t num_channels, + size_t samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + int num_clipped = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + int num_clipped_in_ch = 0; + for (size_t i = 0; i < samples_per_channel; ++i) { + RTC_DCHECK(audio[ch]); + if (audio[ch][i] >= 32767.0f || audio[ch][i] <= -32768.0f) { + ++num_clipped_in_ch; + } + } + num_clipped = std::max(num_clipped, num_clipped_in_ch); + } + return static_cast(num_clipped) / (samples_per_channel); +} + +void LogClippingMetrics(int clipping_rate) { + RTC_LOG(LS_INFO) << "Input clipping rate: " << clipping_rate << "%"; + RTC_HISTOGRAM_COUNTS_LINEAR(/*name=*/"WebRTC.Audio.Agc.InputClippingRate", + /*sample=*/clipping_rate, /*min=*/0, /*max=*/100, + /*bucket_count=*/50); +} + +// Computes the speech level error in dB. `speech_level_dbfs` is required to be +// in the range [-90.0f, 30.0f] and `speech_probability` in the range +// [0.0f, 1.0f]. +int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) { + constexpr float kMinSpeechLevelDbfs = -90.0f; + constexpr float kMaxSpeechLevelDbfs = 30.0f; + RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); + RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + + if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) { + return 0; + } + + const float speech_level = rtc::SafeClamp( + speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); + + return std::round(kOverrideTargetSpeechLevelDbfs - speech_level); +} + +} // namespace + +MonoAgc::MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level) + : min_mic_level_(min_mic_level), + disable_digital_adaptive_(disable_digital_adaptive), + agc_(std::make_unique()), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + clipped_level_min_(clipped_level_min) {} + +MonoAgc::~MonoAgc() = default; + +void MonoAgc::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + compression_ = disable_digital_adaptive_ ? 0 : target_compression_; + compression_accumulator_ = compression_; + capture_output_used_ = true; + check_volume_on_next_process_ = true; + frames_since_update_gain_ = 0; + is_first_frame_ = true; +} + +void MonoAgc::Process(rtc::ArrayView audio, + absl::optional rms_error_override) { + new_compression_to_set_ = absl::nullopt; + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + agc_->Process(audio); + + // Always check if `agc_` has a new error available. If yes, `agc_` gets + // reset. + // TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()` + // if an error override is used. + int rms_error = 0; + bool update_gain = agc_->GetRmsErrorDb(&rms_error); + if (rms_error_override.has_value()) { + if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) { + update_gain = false; + } else { + rms_error = *rms_error_override; + update_gain = true; + } + } + + if (update_gain) { + UpdateGain(rms_error); + } + + if (!disable_digital_adaptive_) { + UpdateCompressor(); + } + + is_first_frame_ = false; + if (frames_since_update_gain_ < kOverrideWaitFrames) { + ++frames_since_update_gain_; + } +} + +void MonoAgc::HandleClipping(int clipped_level_step) { + RTC_DCHECK_GT(clipped_level_step, 0); + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(clipped_level_min_, max_level_ - clipped_level_step)); + if (log_to_histograms_) { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + level_ - clipped_level_step >= clipped_level_min_); + } + if (level_ > clipped_level_min_) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); + // Reset the AGCs for all channels since the level has changed. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + } +} + +void MonoAgc::SetLevel(int new_level) { + int voe_level = recommended_input_volume_; + if (voe_level == 0) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level < 0 || voe_level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" + << voe_level; + return; + } + + // Detect manual input volume adjustments by checking if the current level + // `voe_level` is outside of the `[level_ - kLevelQuantizationSlack, level_ + + // kLevelQuantizationSlack]` range where `level_` is the last input volume + // known by this gain controller. + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + RTC_DLOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + "stored level from " + << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + frames_since_update_gain_ = 0; + is_first_frame_ = false; + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + recommended_input_volume_ = new_level; + RTC_DLOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", level_=" << level_ + << ", new_level=" << new_level; + level_ = new_level; +} + +void MonoAgc::SetMaxLevel(int level) { + RTC_DCHECK_GE(level, clipped_level_min_); + max_level_ = level; + // Scale the `kSurplusCompressionGain` linearly across the restricted + // level range. + max_compression_gain_ = + kMaxCompressionGain + std::floor((1.f * kMaxMicLevel - max_level_) / + (kMaxMicLevel - clipped_level_min_) * + kSurplusCompressionGain + + 0.5f); + RTC_DLOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void MonoAgc::HandleCaptureOutputUsedChange(bool capture_output_used) { + if (capture_output_used_ == capture_output_used) { + return; + } + capture_output_used_ = capture_output_used; + + if (capture_output_used) { + // When we start using the output, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +int MonoAgc::CheckVolumeAndReset() { + int level = recommended_input_volume_; + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of `level` == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + RTC_DLOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level < 0 || level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "[agc] VolumeCallbacks returned an invalid level=" + << level; + return -1; + } + RTC_DLOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + if (level < min_mic_level_) { + level = min_mic_level_; + RTC_DLOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + recommended_input_volume_ = level; + } + agc_->Reset(); + level_ = level; + startup_ = false; + frames_since_update_gain_ = 0; + is_first_frame_ = true; + return 0; +} + +// Distributes the required gain change between the digital compression stage +// and volume slider. We use the compressor first, providing a slack region +// around the current slider position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void MonoAgc::UpdateGain(int rms_error_db) { + int rms_error = rms_error_db; + + // Always reset the counter regardless of whether the gain is changed + // or not. This matches with the bahvior of `agc_` where the histogram is + // reset every time an RMS error is successfully read. + frames_since_update_gain_ = 0; + + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = + rtc::SafeClamp(rms_error, kMinCompressionGain, max_compression_gain_); + + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = + (raw_compression - target_compression_) / 2 + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + const int residual_gain = + rtc::SafeClamp(rms_error - raw_compression, -kMaxResidualGainChange, + kMaxResidualGainChange); + RTC_DLOG(LS_INFO) << "[agc] rms_error=" << rms_error + << ", target_compression=" << target_compression_ + << ", residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + int old_level = level_; + SetLevel(LevelFromGainError(residual_gain, level_, min_mic_level_)); + if (old_level != level_) { + // Reset the AGC since the level has changed. + agc_->Reset(); + } +} + +void MonoAgc::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + new_compression_to_set_ = compression_; + } +} + +std::atomic AgcManagerDirect::instance_counter_(0); + +AgcManagerDirect::AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc) + : AgcManagerDirect(/*num_capture_channels=*/1, analog_config) { + RTC_DCHECK(channel_agcs_[0]); + RTC_DCHECK(agc); + channel_agcs_[0]->set_agc(agc); +} + +AgcManagerDirect::AgcManagerDirect(int num_capture_channels, + const AnalogAgcConfig& analog_config) + : analog_controller_enabled_(analog_config.enabled), + min_mic_level_override_(GetMinMicLevelOverride()), + data_dumper_(new ApmDataDumper(instance_counter_.fetch_add(1) + 1)), + num_capture_channels_(num_capture_channels), + disable_digital_adaptive_(!analog_config.enable_digital_adaptive), + frames_since_clipped_(analog_config.clipped_wait_frames), + capture_output_used_(true), + clipped_level_step_(analog_config.clipped_level_step), + clipped_ratio_threshold_(analog_config.clipped_ratio_threshold), + clipped_wait_frames_(analog_config.clipped_wait_frames), + channel_agcs_(num_capture_channels), + new_compressions_to_set_(num_capture_channels), + clipping_predictor_( + CreateClippingPredictor(num_capture_channels, + analog_config.clipping_predictor)), + use_clipping_predictor_step_( + !!clipping_predictor_ && + analog_config.clipping_predictor.use_predicted_step), + clipping_rate_log_(0.0f), + clipping_rate_log_counter_(0) { + RTC_LOG(LS_INFO) << "[agc] analog controller enabled: " + << (analog_controller_enabled_ ? "yes" : "no"); + const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel); + RTC_LOG(LS_INFO) << "[agc] Min mic level: " << min_mic_level + << " (overridden: " + << (min_mic_level_override_.has_value() ? "yes" : "no") + << ")"; + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr; + + channel_agcs_[ch] = std::make_unique( + data_dumper_ch, analog_config.clipped_level_min, + disable_digital_adaptive_, min_mic_level); + } + RTC_DCHECK(!channel_agcs_.empty()); + RTC_DCHECK_GT(clipped_level_step_, 0); + RTC_DCHECK_LE(clipped_level_step_, 255); + RTC_DCHECK_GT(clipped_ratio_threshold_, 0.0f); + RTC_DCHECK_LT(clipped_ratio_threshold_, 1.0f); + RTC_DCHECK_GT(clipped_wait_frames_, 0); + channel_agcs_[0]->ActivateLogging(); +} + +AgcManagerDirect::~AgcManagerDirect() {} + +void AgcManagerDirect::Initialize() { + RTC_DLOG(LS_INFO) << "AgcManagerDirect::Initialize"; + data_dumper_->InitiateNewSetOfRecordings(); + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->Initialize(); + } + capture_output_used_ = true; + + AggregateChannelLevels(); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; +} + +void AgcManagerDirect::SetupDigitalGainControl( + GainControl& gain_control) const { + if (gain_control.set_mode(GainControl::kFixedDigital) != 0) { + RTC_LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed."; + } + const int target_level_dbfs = disable_digital_adaptive_ ? 0 : 2; + if (gain_control.set_target_level_dbfs(target_level_dbfs) != 0) { + RTC_LOG(LS_ERROR) << "set_target_level_dbfs() failed."; + } + const int compression_gain_db = + disable_digital_adaptive_ ? 0 : kDefaultCompressionGain; + if (gain_control.set_compression_gain_db(compression_gain_db) != 0) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db() failed."; + } + const bool enable_limiter = !disable_digital_adaptive_; + if (gain_control.enable_limiter(enable_limiter) != 0) { + RTC_LOG(LS_ERROR) << "enable_limiter() failed."; + } +} + +void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) { + const float* const* audio = audio_buffer.channels_const(); + size_t samples_per_channel = audio_buffer.num_frames(); + RTC_DCHECK(audio); + + AggregateChannelLevels(); + if (!capture_output_used_) { + return; + } + + if (!!clipping_predictor_) { + AudioFrameView frame = AudioFrameView( + audio, num_capture_channels_, static_cast(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = + ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); + clipping_rate_log_ = std::max(clipped_ratio, clipping_rate_log_); + clipping_rate_log_counter_++; + constexpr int kNumFramesIn30Seconds = 3000; + if (clipping_rate_log_counter_ == kNumFramesIn30Seconds) { + LogClippingMetrics(std::round(100.0f * clipping_rate_log_)); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + } + + if (frames_since_clipped_ < clipped_wait_frames_) { + ++frames_since_clipped_; + return; + } + + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, recommended_input_volume_, clipped_level_step_, + channel_agcs_[channel]->min_mic_level(), kMaxMicLevel); + if (step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + } + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + } + int step = clipped_level_step_; + if (clipping_predicted) { + predicted_step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[agc] Clipping predicted. step=" << predicted_step; + if (use_clipping_predictor_step_) { + step = predicted_step; + } + } + if (clipping_detected || + (clipping_predicted && use_clipping_predictor_step_)) { + for (auto& state_ch : channel_agcs_) { + state_ch->HandleClipping(step); + } + frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + } + } + AggregateChannelLevels(); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) { + Process(audio_buffer, /*speech_probability=*/absl::nullopt, + /*speech_level_dbfs=*/absl::nullopt); +} + +void AgcManagerDirect::Process(const AudioBuffer& audio_buffer, + absl::optional speech_probability, + absl::optional speech_level_dbfs) { + AggregateChannelLevels(); + const int volume_after_clipping_handling = recommended_input_volume_; + + if (!capture_output_used_) { + return; + } + + const size_t num_frames_per_band = audio_buffer.num_frames_per_band(); + absl::optional rms_error_override = absl::nullopt; + if (speech_probability.has_value() && speech_level_dbfs.has_value()) { + rms_error_override = + GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability); + } + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + std::array audio_data; + int16_t* audio_use = audio_data.data(); + FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band, + audio_use); + channel_agcs_[ch]->Process({audio_use, num_frames_per_band}, + rms_error_override); + new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression(); + } + + AggregateChannelLevels(); + if (volume_after_clipping_handling != recommended_input_volume_) { + // The recommended input volume was adjusted in order to match the target + // level. + UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget( + recommended_input_volume_); + } +} + +absl::optional AgcManagerDirect::GetDigitalComressionGain() { + return new_compressions_to_set_[channel_controlling_gain_]; +} + +void AgcManagerDirect::HandleCaptureOutputUsedChange(bool capture_output_used) { + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->HandleCaptureOutputUsedChange(capture_output_used); + } + capture_output_used_ = capture_output_used; +} + +float AgcManagerDirect::voice_probability() const { + float max_prob = 0.f; + for (const auto& state_ch : channel_agcs_) { + max_prob = std::max(max_prob, state_ch->voice_probability()); + } + + return max_prob; +} + +void AgcManagerDirect::set_stream_analog_level(int level) { + if (!analog_controller_enabled_) { + recommended_input_volume_ = level; + } + + for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) { + channel_agcs_[ch]->set_stream_analog_level(level); + } + + AggregateChannelLevels(); +} + +void AgcManagerDirect::AggregateChannelLevels() { + int new_recommended_input_volume = + channel_agcs_[0]->recommended_analog_level(); + channel_controlling_gain_ = 0; + for (size_t ch = 1; ch < channel_agcs_.size(); ++ch) { + int level = channel_agcs_[ch]->recommended_analog_level(); + if (level < new_recommended_input_volume) { + new_recommended_input_volume = level; + channel_controlling_gain_ = static_cast(ch); + } + } + + if (min_mic_level_override_.has_value() && new_recommended_input_volume > 0) { + new_recommended_input_volume = + std::max(new_recommended_input_volume, *min_mic_level_override_); + } + + if (analog_controller_enabled_) { + recommended_input_volume_ = new_recommended_input_volume; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000000..adb2f5a63f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/agc2/clipping_predictor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class MonoAgc; +class GainControl; + +// Adaptive Gain Controller (AGC) that controls the input volume and a digital +// gain. The input volume controller recommends what volume to use, handles +// volume changes and clipping. In particular, it handles changes triggered by +// the user (e.g., volume set to zero by a HW mute button). The digital +// controller chooses and applies the digital compression gain. +// This class is not thread-safe. +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class AgcManagerDirect final { + public: + // Ctor. `num_capture_channels` specifies the number of channels for the audio + // passed to `AnalyzePreProcess()` and `Process()`. Clamps + // `analog_config.startup_min_level` in the [12, 255] range. + AgcManagerDirect( + int num_capture_channels, + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config); + + ~AgcManagerDirect(); + AgcManagerDirect(const AgcManagerDirect&) = delete; + AgcManagerDirect& operator=(const AgcManagerDirect&) = delete; + + void Initialize(); + + // Configures `gain_control` to work as a fixed digital controller so that the + // adaptive part is only handled by this gain controller. Must be called if + // `gain_control` is also used to avoid the side-effects of running two AGCs. + void SetupDigitalGainControl(GainControl& gain_control) const; + + // Sets the applied input volume. + void set_stream_analog_level(int level); + + // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and + // remove `set_stream_analog_level()`. + // Analyzes `audio` before `Process()` is called so that the analysis can be + // performed before external digital processing operations take place (e.g., + // echo cancellation). The analysis consists of input clipping detection and + // prediction (if enabled). Must be called after `set_stream_analog_level()`. + void AnalyzePreProcess(const AudioBuffer& audio_buffer); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. If + // `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range + // [-90.f, 30.0f]) are given, uses them to override the estimated RMS error. + // TODO(webrtc:7494): This signature is needed for testing purposes, unify + // the signatures when the clean-up is done. + void Process(const AudioBuffer& audio_buffer, + absl::optional speech_probability, + absl::optional speech_level_dbfs); + + // Processes `audio_buffer`. Chooses a digital compression gain and the new + // input volume to recommend. Must be called after `AnalyzePreProcess()`. + void Process(const AudioBuffer& audio_buffer); + + // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove + // `recommended_analog_level()`. + // Returns the recommended input volume. If the input volume contoller is + // disabled, returns the input volume set via the latest + // `set_stream_analog_level()` call. Must be called after + // `AnalyzePreProcess()` and `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + // Call when the capture stream output has been flagged to be used/not-used. + // If unused, the manager disregards all incoming audio. + void HandleCaptureOutputUsedChange(bool capture_output_used); + + float voice_probability() const; + + int num_channels() const { return num_capture_channels_; } + + // If available, returns the latest digital compression gain that has been + // chosen. + absl::optional GetDigitalComressionGain(); + + // Returns true if clipping prediction is enabled. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the input volume. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + + private: + friend class AgcManagerDirectTestHelper; + + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, DisableDigitalDisablesDigital); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDefault); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentDisabled); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeAbove); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentOutOfRangeBelow); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabled50); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectTest, + AgcMinMicLevelExperimentEnabledAboveStartupLevel); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + ClippingParametersVerified); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + EmptyRmsErrorOverrideHasNoEffect); + FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest, + NonEmptyRmsErrorOverrideHasEffect); + + // Ctor that creates a single channel AGC and by injecting `agc`. + // `agc` will be owned by this class; hence, do not delete it. + AgcManagerDirect( + const AudioProcessing::Config::GainController1::AnalogGainController& + analog_config, + Agc* agc); + + void AggregateChannelLevels(); + + const bool analog_controller_enabled_; + + const absl::optional min_mic_level_override_; + std::unique_ptr data_dumper_; + static std::atomic instance_counter_; + const int num_capture_channels_; + const bool disable_digital_adaptive_; + + int frames_since_clipped_; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied input + // volume. + // TODO(bugs.webrtc.org/7494): Once + // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial + // getter, leave uninitialized. + // Recommended input volume. After `set_stream_analog_level()` is called it + // holds the observed input volume. Possibly updated by `AnalyzePreProcess()` + // and `Process()`; after these calls, holds the recommended input volume. + int recommended_input_volume_ = 0; + + bool capture_output_used_; + int channel_controlling_gain_ = 0; + + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + + std::vector> channel_agcs_; + std::vector> new_compressions_to_set_; + + const std::unique_ptr clipping_predictor_; + const bool use_clipping_predictor_step_; + float clipping_rate_log_; + int clipping_rate_log_counter_; +}; + +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class MonoAgc { + public: + MonoAgc(ApmDataDumper* data_dumper, + int clipped_level_min, + bool disable_digital_adaptive, + int min_mic_level); + ~MonoAgc(); + MonoAgc(const MonoAgc&) = delete; + MonoAgc& operator=(const MonoAgc&) = delete; + + void Initialize(); + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Sets the current input volume. + void set_stream_analog_level(int level) { recommended_input_volume_ = level; } + + // Lowers the recommended input volume in response to clipping based on the + // suggested reduction `clipped_level_step`. Must be called after + // `set_stream_analog_level()`. + void HandleClipping(int clipped_level_step); + + // Analyzes `audio`, requests the RMS error from AGC, updates the recommended + // input volume based on the estimated speech level and, if enabled, updates + // the (digital) compression gain to be applied by `agc_`. Must be called + // after `HandleClipping()`. If `rms_error_override` has a value, RMS error + // from AGC is overridden by it. + void Process(rtc::ArrayView audio, + absl::optional rms_error_override); + + // Returns the recommended input volume. Must be called after `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + float voice_probability() const { return agc_->voice_probability(); } + void ActivateLogging() { log_to_histograms_ = true; } + absl::optional new_compression() const { + return new_compression_to_set_; + } + + // Only used for testing. + void set_agc(Agc* agc) { agc_.reset(agc); } + int min_mic_level() const { return min_mic_level_; } + + private: + // Sets a new input volume, after first checking that it hasn't been updated + // by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum input volume the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The volume must be at least + // `kClippedLevelMin`. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + void UpdateGain(int rms_error_db); + void UpdateCompressor(); + + const int min_mic_level_; + const bool disable_digital_adaptive_; + std::unique_ptr agc_; + int level_ = 0; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_output_used_ = true; + bool check_volume_on_next_process_ = true; + bool startup_ = true; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied + // input volume. + // Recommended input volume. After `set_stream_analog_level()` is + // called, it holds the observed applied input volume. Possibly updated by + // `HandleClipping()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + + absl::optional new_compression_to_set_; + bool log_to_histograms_ = false; + const int clipped_level_min_; + + // Frames since the last `UpdateGain()` call. + int frames_since_update_gain_ = 0; + // Set to true for the first frame after startup and reset, otherwise false. + bool is_first_frame_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc new file mode 100644 index 0000000000..70ac0b5b34 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc @@ -0,0 +1,2184 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc/mock_agc.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kInitialInputVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 12; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; +constexpr float kLowSpeechProbability = 0.1f; +constexpr float kHighSpeechProbability = 0.7f; +constexpr float kSpeechLevelDbfs = -25.0f; + +constexpr float kMinSample = std::numeric_limits::min(); +constexpr float kMaxSample = std::numeric_limits::max(); + +using AnalogAgcConfig = + AudioProcessing::Config::GainController1::AnalogGainController; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +constexpr AnalogAgcConfig kDefaultAnalogConfig{}; + +class MockGainControl : public GainControl { + public: + virtual ~MockGainControl() {} + MOCK_METHOD(int, set_stream_analog_level, (int level), (override)); + MOCK_METHOD(int, stream_analog_level, (), (const, override)); + MOCK_METHOD(int, set_mode, (Mode mode), (override)); + MOCK_METHOD(Mode, mode, (), (const, override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); + MOCK_METHOD(int, set_compression_gain_db, (int gain), (override)); + MOCK_METHOD(int, compression_gain_db, (), (const, override)); + MOCK_METHOD(int, enable_limiter, (bool enable), (override)); + MOCK_METHOD(bool, is_limiter_enabled, (), (const, override)); + MOCK_METHOD(int, + set_analog_level_limits, + (int minimum, int maximum), + (override)); + MOCK_METHOD(int, analog_level_minimum, (), (const, override)); + MOCK_METHOD(int, analog_level_maximum, (), (const, override)); + MOCK_METHOD(bool, stream_is_saturated, (), (const, override)); +}; + +// TODO(bugs.webrtc.org/12874): Remove and use designated initializers once +// fixed. +std::unique_ptr CreateAgcManagerDirect( + int startup_min_volume, + int clipped_level_step, + float clipped_ratio_threshold, + int clipped_wait_frames, + const ClippingPredictorConfig& clipping_predictor_config = + kDefaultAnalogConfig.clipping_predictor) { + AnalogAgcConfig config; + config.startup_min_volume = startup_min_volume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = false; + config.clipped_level_step = clipped_level_step; + config.clipped_ratio_threshold = clipped_ratio_threshold; + config.clipped_wait_frames = clipped_wait_frames; + config.clipping_predictor = clipping_predictor_config; + return std::make_unique(/*num_capture_channels=*/1, config); +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +// Calls `AnalyzePreProcess()` on `manager` `num_calls` times. `peak_ratio` is a +// value in [0, 1] which determines the amplitude of the samples (1 maps to full +// scale). The first half of the calls is made on frames which are half filled +// with zeros in order to simulate a signal with different crest factors. +void CallPreProcessAudioBuffer(int num_calls, + float peak_ratio, + AgcManagerDirect& manager) { + RTC_DCHECK_LE(peak_ratio, 1.0f); + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + + // Make half of the calls with half zeroed frames. + for (int ch = 0; ch < num_channels; ++ch) { + // 50% of the samples in one frame are zero. + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } + for (int n = 0; n < num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make the remaining half of the calls with frames whose samples are all set. + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; ++i) { + audio_buffer.channels()[ch][i] = peak_ratio * 32767.0f; + } + } + for (int n = 0; n < num_calls - num_calls / 2; ++n) { + manager.AnalyzePreProcess(audio_buffer); + } +} + +constexpr char kMinMicLevelFieldTrial[] = + "WebRTC-Audio-2ndAgcMinMicLevelExperiment"; + +std::string GetAgcMinMicLevelExperimentFieldTrial(const std::string& value) { + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/" << value << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrialEnabled( + int enabled_value, + const std::string& suffix = "") { + RTC_DCHECK_GE(enabled_value, 0); + RTC_DCHECK_LE(enabled_value, 255); + char field_trial_buffer[64]; + rtc::SimpleStringBuilder builder(field_trial_buffer); + builder << kMinMicLevelFieldTrial << "/Enabled-" << enabled_value << suffix + << "/"; + return builder.str(); +} + +std::string GetAgcMinMicLevelExperimentFieldTrial( + absl::optional min_mic_level) { + if (min_mic_level.has_value()) { + return GetAgcMinMicLevelExperimentFieldTrialEnabled(*min_mic_level); + } + return GetAgcMinMicLevelExperimentFieldTrial("Disabled"); +} + +// (Over)writes `samples_value` for the samples in `audio_buffer`. +// When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding +// fraction of the frame is set to a full scale value to simulate clipping. +void WriteAudioBufferSamples(float samples_value, + float clipped_ratio, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + int num_channels = audio_buffer.num_channels(); + int num_samples = audio_buffer.num_frames(); + int num_clipping_samples = clipped_ratio * num_samples; + for (int ch = 0; ch < num_channels; ++ch) { + int i = 0; + for (; i < num_clipping_samples; ++i) { + audio_buffer.channels()[ch][i] = 32767.0f; + } + for (; i < num_samples; ++i) { + audio_buffer.channels()[ch][i] = samples_value; + } + } +} + +// Deprecated. +// TODO(bugs.webrtc.org/7494): Delete this helper, use +// `AgcManagerDirectTestHelper::CallAgcSequence()` instead. +void CallPreProcessAndProcess(int num_calls, + const AudioBuffer& audio_buffer, + absl::optional speech_probability_override, + absl::optional speech_level_override, + AgcManagerDirect& manager) { + for (int n = 0; n < num_calls; ++n) { + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + } +} + +// Reads a given number of 10 ms chunks from a PCM file and feeds them to +// `AgcManagerDirect`. +class SpeechSamplesReader { + private: + // Recording properties. + static constexpr int kPcmSampleRateHz = 16000; + static constexpr int kPcmNumChannels = 1; + static constexpr int kPcmBytesPerSamples = sizeof(int16_t); + + public: + SpeechSamplesReader() + : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), + std::ios::binary | std::ios::ate), + audio_buffer_(kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels), + buffer_(audio_buffer_.num_frames()), + buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { + RTC_CHECK(is_); + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + void Feed(int num_frames, int gain_db, AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_); + } + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `agc` by calling `AnalyzePreProcess()` + // and `Process()` for each frame. Reads the number of 10 ms frames available + // in the PCM file if `num_frames` is too large - i.e., does not loop. + // `speech_probability_override` and `speech_level_override` are passed to + // `Process()` where they are used to override the `agc` RMS error if they + // have a value. + void Feed(int num_frames, + int gain_db, + absl::optional speech_probability_override, + absl::optional speech_level_override, + AgcManagerDirect& agc) { + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + + agc.AnalyzePreProcess(audio_buffer_); + agc.Process(audio_buffer_, speech_probability_override, + speech_level_override); + } + } + + private: + std::ifstream is_; + AudioBuffer audio_buffer_; + std::vector buffer_; + const std::streamsize buffer_num_bytes_; +}; + +} // namespace + +// TODO(bugs.webrtc.org/12874): Use constexpr struct with designated +// initializers once fixed. +constexpr AnalogAgcConfig GetAnalogAgcTestConfig() { + AnalogAgcConfig config; + config.enabled = true; + config.startup_min_volume = kInitialInputVolume; + config.clipped_level_min = kClippedMin; + config.enable_digital_adaptive = true; + config.clipped_level_step = kClippedLevelStep; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + config.clipping_predictor = kDefaultAnalogConfig.clipping_predictor; + return config; +} + +constexpr AnalogAgcConfig GetDisabledAnalogAgcConfig() { + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.enabled = false; + return config; +} + +// Helper class that provides an `AgcManagerDirect` instance with an injected +// `Agc` mock, an `AudioBuffer` instance and `CallAgcSequence()`, a helper +// method that runs the `AgcManagerDirect` instance on the `AudioBuffer` one by +// sticking to the API contract. +class AgcManagerDirectTestHelper { + public: + // Ctor. Initializes `audio_buffer` with zeros. + AgcManagerDirectTestHelper() + : audio_buffer(kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels), + mock_agc(new ::testing::NiceMock()), + manager(GetAnalogAgcTestConfig(), mock_agc) { + manager.Initialize(); + manager.SetupDigitalGainControl(mock_gain_control); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + audio_buffer); + } + + // Calls the sequence of `AgcManagerDirect` methods according to the API + // contract, namely: + // - Sets the applied input volume; + // - Uses `audio_buffer` to call `AnalyzePreProcess()` and `Process()`; + // - Sets the digital compression gain, if specified, on the injected + // `mock_agc`. Returns the recommended input volume. The RMS error from + // AGC is replaced by an override value if `speech_probability_override` + // and `speech_level_override` have a value. + int CallAgcSequence(int applied_input_volume, + absl::optional speech_probability_override, + absl::optional speech_level_override) { + manager.set_stream_analog_level(applied_input_volume); + manager.AnalyzePreProcess(audio_buffer); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional digital_gain = manager.GetDigitalComressionGain(); + if (digital_gain) { + mock_gain_control.set_compression_gain_db(*digital_gain); + } + return manager.recommended_analog_level(); + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. The RMS error from AGC is replaced by an override + // value if `speech_probability_override` and `speech_level_override` have + // a value. + void CallProcess(int num_calls, + absl::optional speech_probability_override, + absl::optional speech_level_override) { + for (int i = 0; i < num_calls; ++i) { + EXPECT_CALL(*mock_agc, Process(_)).WillOnce(Return()); + manager.Process(audio_buffer, speech_probability_override, + speech_level_override); + absl::optional new_digital_gain = manager.GetDigitalComressionGain(); + if (new_digital_gain) { + mock_gain_control.set_compression_gain_db(*new_digital_gain); + } + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProc(int num_calls, float clipped_ratio) { + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, + audio_buffer); + for (int i = 0; i < num_calls; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallPreProcForChangingAudio(int num_calls, float peak_ratio) { + RTC_DCHECK_GE(peak_ratio, 0.0f); + RTC_DCHECK_LE(peak_ratio, 1.0f); + const float samples_value = peak_ratio * 32767.0f; + + // Make half of the calls on a frame where the samples alternate + // `sample_values` and zeros. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + for (size_t k = 1; k < audio_buffer.num_frames(); k += 2) { + audio_buffer.channels()[ch][k] = 0.0f; + } + } + for (int i = 0; i < num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + + // Make half of thecalls on a frame where all the samples equal + // `sample_values`. + WriteAudioBufferSamples(samples_value, /*clipped_ratio=*/0.0f, + audio_buffer); + for (int i = 0; i < num_calls - num_calls / 2; ++i) { + manager.AnalyzePreProcess(audio_buffer); + } + } + + AudioBuffer audio_buffer; + MockAgc* mock_agc; + AgcManagerDirect manager; + MockGainControl mock_gain_control; +}; + +class AgcManagerDirectParametrizedTest + : public ::testing::TestWithParam, bool>> { + protected: + AgcManagerDirectParametrizedTest() + : field_trials_( + GetAgcMinMicLevelExperimentFieldTrial(std::get<0>(GetParam()))) {} + + bool IsMinMicLevelOverridden() const { + return std::get<0>(GetParam()).has_value(); + } + int GetMinMicLevel() const { + return std::get<0>(GetParam()).value_or(kMinMicLevel); + } + + bool IsRmsErrorOverridden() const { return std::get<1>(GetParam()); } + absl::optional GetOverrideOrEmpty(float value) const { + return IsRmsErrorOverridden() ? absl::optional(value) + : absl::nullopt; + } + + private: + test::ScopedFieldTrials field_trials_; +}; + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectParametrizedTest, + ::testing::Combine(testing::Values(absl::nullopt, 12, 20), + testing::Bool())); + +// Checks that when the analog controller is disabled, no downward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to AMP config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, + DisabledAnalogAgcDoesNotAdaptDownwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + constexpr int kAnalogLevel = 250; + static_assert(kAnalogLevel > kInitialInputVolume, "Increase `kAnalogLevel`."); + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Make a first call with input that doesn't clip in order to let the + // controller read the input volume. That is needed because clipping input + // causes the controller to stay in idle state for + // `AnalogAgcConfig::clipped_wait_frames` frames. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.0f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-18.0f)); + + // Feed clipping input to trigger a downward adapation of the analog level. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.2f, + audio_buffer); + manager_no_analog_agc.AnalyzePreProcess(audio_buffer); + manager_with_analog_agc.AnalyzePreProcess(audio_buffer); + manager_no_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + manager_with_analog_agc.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-10.0f)); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers a downward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_LT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +// Checks that when the analog controller is disabled, no upward adaptation +// takes place. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST_P(AgcManagerDirectParametrizedTest, DisabledAnalogAgcDoesNotAdaptUpwards) { + AgcManagerDirect manager_no_analog_agc(kNumChannels, + GetDisabledAnalogAgcConfig()); + manager_no_analog_agc.Initialize(); + AgcManagerDirect manager_with_analog_agc(kNumChannels, + GetAnalogAgcTestConfig()); + manager_with_analog_agc.Initialize(); + + constexpr int kAnalogLevel = kInitialInputVolume; + manager_no_analog_agc.set_stream_analog_level(kAnalogLevel); + manager_with_analog_agc.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_no_analog_agc); + reader.Feed(kNumFrames, kGainDb, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-42.0f), manager_with_analog_agc); + + // Check that no adaptation occurs when the analog controller is disabled + // and make sure that the test triggers an upward adaptation otherwise. + EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kAnalogLevel); + ASSERT_GT(manager_with_analog_agc.recommended_analog_level(), kAnalogLevel); +} + +TEST_P(AgcManagerDirectParametrizedTest, + StartupMinVolumeConfigurationIsRespected) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(kInitialInputVolume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeResponseToRmsError) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no residual error. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + + // Above the compressor's window; volume should be increased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(130, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(168, helper.manager.recommended_analog_level()); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + + // Below the compressor's window; volume should be decreased. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(167, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(163, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-9), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-9.0f)); + EXPECT_EQ(129, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, MicVolumeIsLimited) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Maximum upwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + + // Won't go higher than the maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(254, helper.manager.recommended_analog_level()); + + // Maximum downwards change is limited. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(194, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(137, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(88, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(54, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(33, helper.manager.recommended_analog_level()); + + // Won't go lower than the minimum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(18, GetMinMicLevel()), + helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(22.0f)); + EXPECT_EQ(std::max(12, GetMinMicLevel()), + helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorStepsTowardsTarget) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Compressor default; no call to set_compression_gain_db. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + + // Moves slowly upwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/19, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + // Moves slowly downward, then reverses before reaching the original target. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-23.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-27.0f)); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorErrorIsDeemphasized) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + helper.CallProcess(/*num_calls=*/18, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(7)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(_)).Times(0); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMaximum) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-28.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, CompressorReachesMinimum) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-18.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(6)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(5)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(4)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(3)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(2)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoActionWhileMuted) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.Process(helper.audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + absl::optional new_digital_gain = + helper.manager.GetDigitalComressionGain(); + if (new_digital_gain) { + helper.mock_gain_control.set_compression_gain_db(*new_digital_gain); + } +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingChecksVolumeWithoutRaising) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 127; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + // SetMicVolume should not be called. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(127, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, UnmutingRaisesTooLowVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.manager.HandleCaptureOutputUsedChange(false); + helper.manager.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 11; + helper.manager.set_stream_analog_level(kInputVolume); + EXPECT_CALL(*helper.mock_agc, Reset()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)).WillOnce(Return(false)); + helper.CallProcess(/*num_calls=*/1, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ManualLevelChangeResultsInNoSetMicCall) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Change outside of compressor's range, which would normally trigger a call + // to `SetMicVolume()`. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + + // When the analog volume changes, the gain controller is reset. + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + ASSERT_NE(helper.manager.recommended_analog_level(), 154); + helper.manager.set_stream_analog_level(154); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(154, helper.manager.recommended_analog_level()); + + // Do the same thing, except downwards now. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(100); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(100, helper.manager.recommended_analog_level()); + + // And finally verify the AGC continues working without a manual change. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(99, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeFromMax) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(183, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(243, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + + // Manual change does not result in SetMicVolume call. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(50); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(50, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + + EXPECT_EQ(69, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is not specified, AGC ramps up +// towards the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMinWithoutMiMicLevelnOverride) { + if (IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise AGC won't take + // any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(1, helper.manager.recommended_analog_level()); + + // Continues working as usual afterwards. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + EXPECT_EQ(2, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(11, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-38.0f)); + EXPECT_EQ(18, helper.manager.recommended_analog_level()); +} + +// Checks that, when the min mic level override is specified, AGC immediately +// applies the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(AgcManagerDirectParametrizedTest, + RecoveryAfterManualLevelChangeBelowMin) { + if (!IsMinMicLevelOverridden()) { + GTEST_SKIP() << "Skipped. Min mic level not overridden."; + } + + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + // Manual change below min, but strictly positive, otherwise + // AGC won't take any action. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + helper.manager.set_stream_analog_level(1); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-17.0f)); + EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, NoClippingHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/100, /*clipped_ratio=*/0); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingUnderThresholdHasNoImpact) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.099); + EXPECT_EQ(128, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.2); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, WaitingPeriodBetweenClippingChecks) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(225, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingLoweringIsLimited) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/180, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + helper.CallPreProc(/*num_calls=*/1000, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenEqualToLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + ClippingMaxIsRespectedWhenHigherThanLevel) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/200, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(185, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); + helper.CallProcess(/*num_calls=*/10, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + MaxCompressionIsIncreasedAfterClipping) { + constexpr absl::optional kNoOverride = absl::nullopt; + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/210, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, kAboveClippedThreshold); + EXPECT_EQ(195, helper.manager.recommended_analog_level()); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/5, speech_probability_override, + GetOverrideOrEmpty(-29.0f)); + // The mock `GetRmsErrorDb()` returns false; mimic this by passing + // absl::nullopt as an override. + helper.CallProcess(/*num_calls=*/14, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(8)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(9)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(10)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(11)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(12)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(13)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); + + // Continue clipping until we hit the maximum surplus compression. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(180, helper.manager.recommended_analog_level()); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(1, kAboveClippedThreshold); + EXPECT_EQ(kClippedMin, helper.manager.recommended_analog_level()); + + // Current level is now at the minimum, but the maximum allowed level still + // has more to decrease. + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + helper.CallPreProc(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillRepeatedly(Return(false)); + helper.CallProcess(/*num_calls=*/4, speech_probability_override, + GetOverrideOrEmpty(-34.0f)); + helper.CallProcess(/*num_calls=*/15, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(14)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(15)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(16)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(17)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/20, kNoOverride, kNoOverride); + EXPECT_CALL(helper.mock_gain_control, set_compression_gain_db(18)) + .WillOnce(Return(0)); + helper.CallProcess(/*num_calls=*/1, kNoOverride, kNoOverride); +} + +TEST_P(AgcManagerDirectParametrizedTest, UserCanRaiseVolumeAfterClipping) { + const auto speech_probability_override = + GetOverrideOrEmpty(kHighSpeechProbability); + + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/225, + speech_probability_override, + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + + // High enough error to trigger a volume check. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(14), Return(true))); + // User changed the volume. + helper.manager.set_stream_analog_level(250); + EXPECT_CALL(*helper.mock_agc, Reset()).Times(AtLeast(1)); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-32.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + + // Move down... + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-10), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-8.0f)); + EXPECT_EQ(210, helper.manager.recommended_analog_level()); + // And back up to the new max established by the user. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(40), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-58.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); + // Will not move above new maximum. + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + helper.CallProcess(/*num_calls=*/1, speech_probability_override, + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(250, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDoesNotPullLowVolumeBackUp) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/80, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, Reset()).Times(0); + int initial_volume = helper.manager.recommended_analog_level(); + helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(initial_volume, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, TakesNoActionOnZeroMicVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(kInitialInputVolume, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_CALL(*helper.mock_agc, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + helper.manager.set_stream_analog_level(0); + helper.CallProcess(/*num_calls=*/10, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(-48.0f)); + EXPECT_EQ(0, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingDetectionLowersVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/1.0f); + EXPECT_EQ(240, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisabledClippingPredictorDoesNotLowerVolume) { + AgcManagerDirectTestHelper helper; + helper.CallAgcSequence(/*applied_input_volume=*/255, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_FALSE(helper.manager.clipping_predictor_enabled()); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); + helper.CallPreProcForChangingAudio(/*num_calls=*/100, /*peak_ratio=*/0.99f); + EXPECT_EQ(255, helper.manager.recommended_analog_level()); +} + +TEST_P(AgcManagerDirectParametrizedTest, DisableDigitalDisablesDigital) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + auto agc = std::unique_ptr(new ::testing::NiceMock()); + MockGainControl mock_gain_control; + EXPECT_CALL(mock_gain_control, set_mode(GainControl::kFixedDigital)); + EXPECT_CALL(mock_gain_control, set_target_level_dbfs(0)); + EXPECT_CALL(mock_gain_control, set_compression_gain_db(0)); + EXPECT_CALL(mock_gain_control, enable_limiter(false)); + + AnalogAgcConfig config; + config.enable_digital_adaptive = false; + auto manager = std::make_unique(kNumChannels, config); + manager->Initialize(); + manager->SetupDigitalGainControl(mock_gain_control); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDefault) { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentDisabled) { + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix)); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); + } +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeAbove) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled-256")); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Checks that a field-trial parameter outside of the valid range [0,255] is +// ignored. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentOutOfRangeBelow) { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrial("Enabled--1")); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevel); +} + +// Verifies that a valid experiment changes the minimum microphone level. The +// start volume is larger than the min level and should therefore not be +// changed. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentEnabled50) { + constexpr int kMinMicLevelOverride = 50; + for (const std::string& field_trial_suffix : {"", "_20220210"}) { + SCOPED_TRACE(field_trial_suffix); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride, + field_trial_suffix)); + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + EXPECT_EQ(manager->channel_agcs_[0]->min_mic_level(), kMinMicLevelOverride); + } +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping. +TEST(AgcManagerDirectTest, AgcMinMicLevelExperimentCheckMinLevelWithClipping) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a valid value, the mic level never gets lowered beyond the +// override value in the presence of clipping when RMS error override is used. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCheckMinLevelWithClippingWithRmsErrorOverride) { + constexpr int kMinMicLevelOverride = 250; + + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + manager->set_stream_analog_level(kInitialInputVolume); + return manager; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/0.7f, + /*speech_probability_level=*/-18.0f, *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_probability_level=*/absl::optional(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the test signal triggers a larger downward adaptation for + // `manager`, which is allowed to reach a lower gain. + EXPECT_GT(manager_with_override->recommended_analog_level(), + manager->recommended_analog_level()); + // Check that the gain selected by `manager_with_override` equals the minimum + // value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClipping) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + // Simulate 4 seconds of clipping; it is expected to trigger a downward + // adjustment of the analog gain. + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, *manager); + CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::nullopt, + /*speech_level_override=*/absl::nullopt, + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is +// specified with a value lower than the `clipped_level_min`, the behavior of +// the analog gain controller is the same as that obtained when the field trial +// is not specified. +// TODO(webrtc:7494): Revisit the test after moving the number of override wait +// frames to APM config. The test passes but internally the gain update timing +// differs. +TEST(AgcManagerDirectTest, + AgcMinMicLevelExperimentCompareMicLevelWithClippingWithRmsErrorOverride) { + // Create and initialize two AGCs by specifying and leaving unspecified the + // relevant field trial. + const auto factory = []() { + // Use a large clipped level step to more quickly decrease the analog gain + // with clipping. + AnalogAgcConfig config = kDefaultAnalogConfig; + config.startup_min_volume = kInitialInputVolume; + config.enable_digital_adaptive = false; + config.clipped_level_step = 64; + config.clipped_ratio_threshold = kClippedRatioThreshold; + config.clipped_wait_frames = kClippedWaitFrames; + auto controller = + std::make_unique(/*num_capture_channels=*/1, config); + controller->Initialize(); + controller->set_stream_analog_level(kInitialInputVolume); + return controller; + }; + std::unique_ptr manager = factory(); + std::unique_ptr manager_with_override; + { + constexpr int kMinMicLevelOverride = 20; + static_assert( + kDefaultAnalogConfig.clipped_level_min >= kMinMicLevelOverride, + "Use a lower override value."); + test::ScopedFieldTrials field_trial( + GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); + manager_with_override = factory(); + } + + // Create a test input signal which containts 80% of clipped samples. + AudioBuffer audio_buffer(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, + 1); + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + audio_buffer); + + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_level_override=*/absl::optional(-18.0f), *manager); + CallPreProcessAndProcess( + /*num_calls=*/400, audio_buffer, + /*speech_probability_override=*/absl::optional(0.7f), + /*speech_level_override=*/absl::optional(-18.0f), + *manager_with_override); + + // Make sure that an adaptation occurred. + ASSERT_GT(manager->recommended_analog_level(), 0); + + // Check that the selected analog gain is the same for both controllers and + // that it equals the minimum level reached when clipping is handled. That is + // expected because the minimum microphone level override is less than the + // minimum level used when clipping is detected. + EXPECT_EQ(manager->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kDefaultAnalogConfig.clipped_level_min); +} + +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST_P(AgcManagerDirectParametrizedTest, ClippingParametersVerified) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + std::unique_ptr manager = + CreateAgcManagerDirect(kInitialInputVolume, kClippedLevelStep, + kClippedRatioThreshold, kClippedWaitFrames); + manager->Initialize(); + EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr manager_custom = + CreateAgcManagerDirect(kInitialInputVolume, + /*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + manager_custom->Initialize(); + EXPECT_EQ(manager_custom->clipped_level_step_, 10); + EXPECT_EQ(manager_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(manager_custom->clipped_wait_frames_, 50); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDisablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = false; + + std::unique_ptr manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_FALSE(manager->clipping_predictor_enabled()); + EXPECT_FALSE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, ClippingPredictorDisabledByDefault) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + constexpr ClippingPredictorConfig kDefaultConfig; + EXPECT_FALSE(kDefaultConfig.enabled); +} + +TEST_P(AgcManagerDirectParametrizedTest, + EnableClippingPredictorEnablesClippingPredictor) { + if (IsRmsErrorOverridden()) { + GTEST_SKIP() << "Skipped. RMS error override does not affect the test."; + } + + // TODO(bugs.webrtc.org/12874): Use designated initializers once fixed. + ClippingPredictorConfig config; + config.enabled = true; + config.use_predicted_step = true; + + std::unique_ptr manager = CreateAgcManagerDirect( + kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, config); + manager->Initialize(); + EXPECT_TRUE(manager->clipping_predictor_enabled()); + EXPECT_TRUE(manager->use_clipping_predictor_step()); +} + +TEST_P(AgcManagerDirectParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config = GetAnalogAgcTestConfig(); + config.clipping_predictor.enabled = false; + AgcManagerDirect manager(config, new ::testing::NiceMock()); + manager.Initialize(); + manager.set_stream_analog_level(/*level=*/255); + EXPECT_FALSE(manager.clipping_predictor_enabled()); + EXPECT_FALSE(manager.use_clipping_predictor_step()); + EXPECT_EQ(manager.recommended_analog_level(), 255); + manager.Process(audio_buffer, GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); + CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); + EXPECT_EQ(manager.recommended_analog_level(), 255); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = true; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock()); + + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_TRUE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change in the analog level when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change when the prediction step is used. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - kClippedLevelStep); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + kInitialLevel - 4 * kClippedLevelStep); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel - 2 * kClippedLevelStep); +} + +TEST_P(AgcManagerDirectParametrizedTest, + UnusedClippingPredictionsProduceEqualAnalogLevels) { + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + AnalogAgcConfig config_with_prediction = GetAnalogAgcTestConfig(); + config_with_prediction.clipping_predictor.enabled = true; + config_with_prediction.clipping_predictor.use_predicted_step = false; + AnalogAgcConfig config_without_prediction = GetAnalogAgcTestConfig(); + config_without_prediction.clipping_predictor.enabled = false; + AgcManagerDirect manager_with_prediction(config_with_prediction, + new ::testing::NiceMock()); + AgcManagerDirect manager_without_prediction( + config_without_prediction, new ::testing::NiceMock()); + + constexpr int kInitialLevel = 255; + constexpr float kClippingPeakRatio = 1.0f; + constexpr float kCloseToClippingPeakRatio = 0.99f; + constexpr float kZeroPeakRatio = 0.0f; + manager_with_prediction.Initialize(); + manager_without_prediction.Initialize(); + manager_with_prediction.set_stream_analog_level(kInitialLevel); + manager_without_prediction.set_stream_analog_level(kInitialLevel); + manager_with_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + manager_without_prediction.Process(audio_buffer, + GetOverrideOrEmpty(kHighSpeechProbability), + GetOverrideOrEmpty(kSpeechLevelDbfs)); + + EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_without_prediction.clipping_predictor_enabled()); + EXPECT_FALSE(manager_with_prediction.use_clipping_predictor_step()); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), kInitialLevel); + EXPECT_EQ(manager_without_prediction.recommended_analog_level(), + kInitialLevel); + + // Expect no change in the analog level for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change for non-clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/10, kCloseToClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change when clipping is not detected or predicted. + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(2 * kClippedWaitFrames, kZeroPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect no change during waiting. + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(kClippedWaitFrames, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); + + // Expect a change for clipping frames. + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_with_prediction); + CallPreProcessAudioBuffer(/*num_calls=*/1, kClippingPeakRatio, + manager_without_prediction); + EXPECT_EQ(manager_with_prediction.recommended_analog_level(), + manager_without_prediction.recommended_analog_level()); +} + +// Checks that passing an empty speech level and probability overrides to +// `Process()` has the same effect as passing no overrides. +TEST_P(AgcManagerDirectParametrizedTest, EmptyRmsErrorOverrideHasNoEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kAnalogLevel = 50; + manager_1.set_stream_analog_level(kAnalogLevel); + manager_2.set_stream_analog_level(kAnalogLevel); + + // Feed speech with low energy to trigger an upward adapation of the analog + // level. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Check the initial input volume. + ASSERT_EQ(manager_1.recommended_analog_level(), kAnalogLevel); + ASSERT_EQ(manager_2.recommended_analog_level(), kAnalogLevel); + + reader.Feed(kNumFrames, kGainDb, absl::nullopt, absl::nullopt, manager_1); + reader.Feed(kNumFrames, kGainDb, manager_2); + + // Check that the states are the same and adaptation occurs. + EXPECT_EQ(manager_1.recommended_analog_level(), + manager_2.recommended_analog_level()); + ASSERT_GT(manager_1.recommended_analog_level(), kAnalogLevel); + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); + EXPECT_EQ(manager_1.frames_since_clipped_, manager_2.frames_since_clipped_); + + // Check that the states of the channel AGCs are the same. + EXPECT_EQ(manager_1.num_channels(), manager_2.num_channels()); + for (int i = 0; i < manager_1.num_channels(); ++i) { + EXPECT_EQ(manager_1.channel_agcs_[i]->recommended_analog_level(), + manager_2.channel_agcs_[i]->recommended_analog_level()); + EXPECT_EQ(manager_1.channel_agcs_[i]->voice_probability(), + manager_2.channel_agcs_[i]->voice_probability()); + } +} + +// Checks that passing a non-empty speech level and probability overrides to +// `Process()` has an effect. +TEST_P(AgcManagerDirectParametrizedTest, NonEmptyRmsErrorOverrideHasEffect) { + AgcManagerDirect manager_1(kNumChannels, GetAnalogAgcTestConfig()); + AgcManagerDirect manager_2(kNumChannels, GetAnalogAgcTestConfig()); + manager_1.Initialize(); + manager_2.Initialize(); + + constexpr int kInputVolume = 128; + manager_1.set_stream_analog_level(kInputVolume); + manager_2.set_stream_analog_level(kInputVolume); + + // Feed speech with low energy to trigger an upward adapation of the input + // volume. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + + // Make sure that the feeding samples triggers an adaptation when no override + // is specified. + reader.Feed(kNumFrames, kGainDb, manager_1); + ASSERT_GT(manager_1.recommended_analog_level(), kInputVolume); + + // Expect that feeding samples triggers an adaptation when the speech + // probability and speech level overrides are specified. + reader.Feed(kNumFrames, kGainDb, + /*speech_probability_override=*/kHighSpeechProbability, + /*speech_level_override=*/-45.0f, manager_2); + EXPECT_GT(manager_2.recommended_analog_level(), kInputVolume); + + // The voice probability override does not affect the `voice_probability()` + // getter. + EXPECT_EQ(manager_1.voice_probability(), manager_2.voice_probability()); +} + +class AgcManagerDirectChannelSampleRateTest + : public ::testing::TestWithParam> { + protected: + int GetNumChannels() const { return std::get<0>(GetParam()); } + int GetSampleRateHz() const { return std::get<1>(GetParam()); } +}; + +TEST_P(AgcManagerDirectChannelSampleRateTest, CheckIsAlive) { + const int num_channels = GetNumChannels(); + const int sample_rate_hz = GetSampleRateHz(); + + constexpr AnalogAgcConfig kConfig{.enabled = true, + .clipping_predictor{.enabled = true}}; + AgcManagerDirect manager(num_channels, kConfig); + manager.Initialize(); + AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + constexpr int kStartupVolume = 100; + int applied_initial_volume = kStartupVolume; + + // Trigger a downward adaptation with clipping. + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.5f, + buffer); + const int initial_volume1 = applied_initial_volume; + for (int i = 0; i < 400; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kLowSpeechProbability, + /*speech_level_dbfs=*/-20.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + ASSERT_LT(manager.recommended_analog_level(), initial_volume1); + + // Fill in audio that does not clip. + WriteAudioBufferSamples(/*samples_value=*/1234.5f, /*clipped_ratio=*/0.0f, + buffer); + + // Trigger an upward adaptation. + const int initial_volume2 = manager.recommended_analog_level(); + for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-65.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_GT(manager.recommended_analog_level(), initial_volume2); + + // Trigger a downward adaptation. + const int initial_volume = manager.recommended_analog_level(); + for (int i = 0; i < 100; ++i) { + manager.set_stream_analog_level(applied_initial_volume); + manager.AnalyzePreProcess(buffer); + manager.Process(buffer, kHighSpeechProbability, + /*speech_level_dbfs=*/-5.0f); + applied_initial_volume = manager.recommended_analog_level(); + } + EXPECT_LT(manager.recommended_analog_level(), initial_volume); +} + +INSTANTIATE_TEST_SUITE_P( + , + AgcManagerDirectChannelSampleRateTest, + ::testing::Combine(::testing::Values(1, 2, 3, 6), + ::testing::Values(8000, 16000, 32000, 48000))); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h new file mode 100644 index 0000000000..389b2114af --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ + +namespace webrtc { + +// The automatic gain control (AGC) component brings the signal to an +// appropriate range. This is done by applying a digital gain directly and, in +// the analog mode, prescribing an analog gain to be applied at the audio HAL. +// +// Recommended to be enabled on the client-side. +class GainControl { + public: + // When an analog mode is set, this must be called prior to `ProcessStream()` + // to pass the current analog level from the audio HAL. Must be within the + // range provided to `set_analog_level_limits()`. + virtual int set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after `ProcessStream()` + // to obtain the recommended new analog level for the audio HAL. It is the + // users responsibility to apply this level. + virtual int stream_analog_level() const = 0; + + enum Mode { + // Adaptive mode intended for use if an analog volume control is available + // on the capture device. It will require the user to provide coupling + // between the OS mixer controls and AGC through the `stream_analog_level()` + // functions. + // + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + + // Adaptive mode intended for situations in which an analog volume control + // is unavailable. It operates in a similar fashion to the adaptive analog + // mode, but with scaling instead applied in the digital domain. As with + // the analog mode, it additionally uses a digital compression stage. + kAdaptiveDigital, + + // Fixed mode which enables only the digital compression stage also used by + // the two adaptive modes. + // + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain through + // most of the input level range, and compresses (gradually reduces gain + // with increasing level) the input signal at higher levels. This mode is + // preferred on embedded devices where the capture signal level is + // predictable, so that a known gain can be applied. + kFixedDigital + }; + + virtual int set_mode(Mode mode) = 0; + virtual Mode mode() const = 0; + + // Sets the target peak `level` (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + // + // TODO(ajm): use a negative value here instead, if/when VoE will similarly + // update its interface. + virtual int set_target_level_dbfs(int level) = 0; + virtual int target_level_dbfs() const = 0; + + // Sets the maximum `gain` the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 will + // leave the signal uncompressed. Limited to [0, 90]. + virtual int set_compression_gain_db(int gain) = 0; + virtual int compression_gain_db() const = 0; + + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + virtual int enable_limiter(bool enable) = 0; + virtual bool is_limiter_enabled() const = 0; + + // Sets the `minimum` and `maximum` analog levels of the audio capture device. + // Must be set if and only if an analog mode is used. Limited to [0, 65535]. + virtual int set_analog_level_limits(int minimum, int maximum) = 0; + virtual int analog_level_minimum() const = 0; + virtual int analog_level_maximum() const = 0; + + // Returns true if the AGC has detected a saturation event (period where the + // signal reaches digital full-scale) in the current frame and the analog + // level cannot be reduced. + // + // This could be used as an indicator to reduce or disable analog mic gain at + // the audio HAL. + virtual bool stream_is_saturated() const = 0; + + protected: + virtual ~GainControl() {} +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build new file mode 100644 index 0000000000..c6ab9b3160 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/gain_control_interface_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_control_interface_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc new file mode 100644 index 0000000000..e40a3f1629 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc @@ -0,0 +1,1238 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * + * Using a feedback system, determines an appropriate analog volume level + * given an input signal and current volume level. Targets a conservative + * signal level and is intended for use with a digital AGC to apply + * additional gain. + * + */ + +#include "modules/audio_processing/agc/legacy/analog_agc.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Errors +#define AGC_UNSPECIFIED_ERROR 18000 +#define AGC_UNINITIALIZED_ERROR 18002 +#define AGC_NULL_POINTER_ERROR 18003 +#define AGC_BAD_PARAMETER_ERROR 18004 + +/* The slope of in Q13*/ +static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129, + 2372, 1362, 472, 78}; + +/* The offset in Q14 */ +static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737, + 19612, 18805, 17951, 17367}; + +/* The slope of in Q13*/ +static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; + +/* The offset in Q14 */ +static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177, + 18052, 17920, 17670, 17286}; + +static const int16_t kMuteGuardTimeMs = 8000; +static const int16_t kInitCheck = 42; +static const size_t kNumSubframes = 10; + +/* Default settings if config is not used */ +#define AGC_DEFAULT_TARGET_LEVEL 3 +#define AGC_DEFAULT_COMP_GAIN 9 +/* This is the target level for the analog part in ENV scale. To convert to RMS + * scale you + * have to add OFFSET_ENV_TO_RMS. + */ +#define ANALOG_TARGET_LEVEL 11 +#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2 +/* Offset between RMS scale (analog part) and ENV scale (digital part). This + * value actually + * varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future + * replace it with + * a table. + */ +#define OFFSET_ENV_TO_RMS 9 +/* The reference input level at which the digital part gives an output of + * targetLevelDbfs + * (desired level) if we have no compression gain. This level should be set high + * enough not + * to compress the peaks due to the dynamics. + */ +#define DIGITAL_REF_AT_0_COMP_GAIN 4 +/* Speed of reference level decrease. + */ +#define DIFF_REF_TO_ANALOG 5 + +/* Size of analog gain table */ +#define GAIN_TBL_LEN 32 +/* Matlab code: + * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); + */ +/* Q12 */ +static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = { + 4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938, + 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934, + 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; + +/* Gain/Suppression tables for virtual Mic (in Q10) */ +static const uint16_t kGainTableVirtualMic[128] = { + 1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378, + 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854, + 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, + 2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, + 3449, 3543, 3640, 3739, 3842, 3947, 4055, 4166, 4280, 4397, 4517, + 4640, 4767, 4898, 5032, 5169, 5311, 5456, 5605, 5758, 5916, 6078, + 6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, 8178, + 8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004, + 11305, 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807, + 15212, 15628, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, + 20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, + 27541, 28295, 29069, 29864, 30681, 31520, 32382}; +static const uint16_t kSuppressionTableVirtualMic[128] = { + 1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, + 780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605, + 594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461, + 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, 371, 364, 358, 351, + 345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, 273, 268, + 263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204, + 200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155, + 153, 150, 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118, + 116, 114, 112, 110, 108, 106, 104, 102}; + +/* Table for target energy levels. Values in Q(-7) + * Matlab code + * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', + * round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ + +static const int32_t kTargetLevelTable[64] = { + 134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911, + 26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411, + 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, + 1066064, 846805, 672641, 534298, 424408, 337119, 267783, + 212708, 168960, 134210, 106606, 84680, 67264, 53430, + 42441, 33712, 26778, 21271, 16896, 13421, 10661, + 8468, 6726, 5343, 4244, 3371, 2678, 2127, + 1690, 1342, 1066, 847, 673, 534, 424, + 337, 268, 213, 169, 134, 107, 85, + 67}; + +} // namespace + +int WebRtcAgc_AddMic(void* state, + int16_t* const* in_mic, + size_t num_bands, + size_t samples) { + int32_t nrg, max_nrg, sample, tmp32; + int32_t* ptr; + uint16_t targetGainIdx, gain; + size_t i; + int16_t n, L, tmp16, tmp_speech[16]; + LegacyAgc* stt; + stt = reinterpret_cast(state); + + if (stt->fs == 8000) { + L = 8; + if (samples != 80) { + return -1; + } + } else { + L = 16; + if (samples != 160) { + return -1; + } + } + + /* apply slowly varying digital gain */ + if (stt->micVol > stt->maxAnalog) { + /* `maxLevel` is strictly >= `micVol`, so this condition should be + * satisfied here, ensuring there is no divide-by-zero. */ + RTC_DCHECK_GT(stt->maxLevel, stt->maxAnalog); + + /* Q1 */ + tmp16 = (int16_t)(stt->micVol - stt->maxAnalog); + tmp32 = (GAIN_TBL_LEN - 1) * tmp16; + tmp16 = (int16_t)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = tmp32 / tmp16; + RTC_DCHECK_LT(targetGainIdx, GAIN_TBL_LEN); + + /* Increment through the table towards the target gain. + * If micVol drops below maxAnalog, we allow the gain + * to be dropped immediately. */ + if (stt->gainTableIdx < targetGainIdx) { + stt->gainTableIdx++; + } else if (stt->gainTableIdx > targetGainIdx) { + stt->gainTableIdx--; + } + + /* Q12 */ + gain = kGainTableAnalog[stt->gainTableIdx]; + + for (i = 0; i < samples; i++) { + size_t j; + for (j = 0; j < num_bands; ++j) { + sample = (in_mic[j][i] * gain) >> 12; + if (sample > 32767) { + in_mic[j][i] = 32767; + } else if (sample < -32768) { + in_mic[j][i] = -32768; + } else { + in_mic[j][i] = (int16_t)sample; + } + } + } + } else { + stt->gainTableIdx = 0; + } + + /* compute envelope */ + if (stt->inQueue > 0) { + ptr = stt->env[1]; + } else { + ptr = stt->env[0]; + } + + for (i = 0; i < kNumSubframes; i++) { + /* iterate over samples */ + max_nrg = 0; + for (n = 0; n < L; n++) { + nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + ptr[i] = max_nrg; + } + + /* compute energy */ + if (stt->inQueue > 0) { + ptr = stt->Rxx16w32_array[1]; + } else { + ptr = stt->Rxx16w32_array[0]; + } + + for (i = 0; i < kNumSubframes / 2; i++) { + if (stt->fs == 16000) { + WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32], 32, tmp_speech, + stt->filterState); + } else { + memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(int16_t)); + } + /* Compute energy in blocks of 16 samples */ + ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); + } + + /* update queue information */ + if (stt->inQueue == 0) { + stt->inQueue = 1; + } else { + stt->inQueue = 2; + } + + /* call VAD (use low band only) */ + WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); + + return 0; +} + +int WebRtcAgc_AddFarend(void* state, const int16_t* in_far, size_t samples) { + LegacyAgc* stt = reinterpret_cast(state); + + int err = WebRtcAgc_GetAddFarendError(state, samples); + + if (err != 0) + return err; + + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); +} + +int WebRtcAgc_GetAddFarendError(void* state, size_t samples) { + LegacyAgc* stt; + stt = reinterpret_cast(state); + + if (stt == NULL) + return -1; + + if (stt->fs == 8000) { + if (samples != 80) + return -1; + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) + return -1; + } else { + return -1; + } + + return 0; +} + +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut) { + int32_t tmpFlt, micLevelTmp, gainIdx; + uint16_t gain; + size_t ii, j; + LegacyAgc* stt; + + uint32_t nrg; + size_t sampleCntr; + uint32_t frameNrg = 0; + uint32_t frameNrgLimit = 5500; + int16_t numZeroCrossing = 0; + const int16_t kZeroCrossingLowLim = 15; + const int16_t kZeroCrossingHighLim = 20; + + stt = reinterpret_cast(agcInst); + + /* + * Before applying gain decide if this is a low-level signal. + * The idea is that digital AGC will not adapt to low-level + * signals. + */ + if (stt->fs != 8000) { + frameNrgLimit = frameNrgLimit << 1; + } + + frameNrg = (uint32_t)(in_near[0][0] * in_near[0][0]); + for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) { + // increment frame energy if it is less than the limit + // the correct value of the energy is not important + if (frameNrg < frameNrgLimit) { + nrg = (uint32_t)(in_near[0][sampleCntr] * in_near[0][sampleCntr]); + frameNrg += nrg; + } + + // Count the zero crossings + numZeroCrossing += + ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); + } + + if ((frameNrg < 500) || (numZeroCrossing <= 5)) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing <= kZeroCrossingLowLim) { + stt->lowLevelSignal = 0; + } else if (frameNrg <= frameNrgLimit) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing >= kZeroCrossingHighLim) { + stt->lowLevelSignal = 1; + } else { + stt->lowLevelSignal = 0; + } + + micLevelTmp = micLevelIn << stt->scale; + /* Set desired level */ + gainIdx = stt->micVol; + if (stt->micVol > stt->maxAnalog) { + gainIdx = stt->maxAnalog; + } + if (micLevelTmp != stt->micRef) { + /* Something has happened with the physical level, restart. */ + stt->micRef = micLevelTmp; + stt->micVol = 127; + *micLevelOut = 127; + stt->micGainIdx = 127; + gainIdx = 127; + } + /* Pre-process the signal to emulate the microphone level. */ + /* Take one step at a time in the gain table. */ + if (gainIdx > 127) { + gain = kGainTableVirtualMic[gainIdx - 128]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + for (ii = 0; ii < samples; ii++) { + tmpFlt = (in_near[0][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + in_near[0][ii] = (int16_t)tmpFlt; + for (j = 1; j < num_bands; ++j) { + tmpFlt = (in_near[j][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + } + in_near[j][ii] = (int16_t)tmpFlt; + } + } + /* Set the level we (finally) used */ + stt->micGainIdx = gainIdx; + // *micLevelOut = stt->micGainIdx; + *micLevelOut = stt->micGainIdx >> stt->scale; + /* Add to Mic as if it was the output from a true microphone */ + if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { + return -1; + } + return 0; +} + +void WebRtcAgc_UpdateAgcThresholds(LegacyAgc* stt) { + int16_t tmp16; + + /* Set analog target level in envelope dBOv scale */ + tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; + tmp16 = WebRtcSpl_DivW32W16ResW16((int32_t)tmp16, ANALOG_TARGET_LEVEL); + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; + if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) { + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN; + } + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->analogTarget = stt->compressionGaindB; + } + /* Since the offset between RMS and ENV is not constant, we should make this + * into a + * table, but for now, we'll stick with a constant, tuned for the chosen + * analog + * target level. + */ + stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; + /* Analog adaptation limits */ + /* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */ + stt->analogTargetLevel = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */ + stt->startUpperLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 1]; /* -19 dBov */ + stt->startLowerLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 1]; /* -21 dBov */ + stt->upperPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 2]; /* -18 dBov */ + stt->lowerPrimaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 2]; /* -22 dBov */ + stt->upperSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx - 5]; /* -15 dBov */ + stt->lowerSecondaryLimit = + kRxxBufferLen * kTargetLevelTable[stt->targetIdx + 5]; /* -25 dBov */ + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +} + +void WebRtcAgc_SaturationCtrl(LegacyAgc* stt, + uint8_t* saturated, + int32_t* env) { + int16_t i, tmpW16; + + /* Check if the signal is saturated */ + for (i = 0; i < 10; i++) { + tmpW16 = (int16_t)(env[i] >> 20); + if (tmpW16 > 875) { + stt->envSum += tmpW16; + } + } + + if (stt->envSum > 25000) { + *saturated = 1; + stt->envSum = 0; + } + + /* stt->envSum *= 0.99; */ + stt->envSum = (int16_t)((stt->envSum * 32440) >> 15); +} + +void WebRtcAgc_ZeroCtrl(LegacyAgc* stt, int32_t* inMicLevel, int32_t* env) { + int16_t i; + int64_t tmp = 0; + int32_t midVal; + + /* Is the input signal zero? */ + for (i = 0; i < 10; i++) { + tmp += env[i]; + } + + /* Each block is allowed to have a few non-zero + * samples. + */ + if (tmp < 500) { + stt->msZero += 10; + } else { + stt->msZero = 0; + } + + if (stt->muteGuardMs > 0) { + stt->muteGuardMs -= 10; + } + + if (stt->msZero > 500) { + stt->msZero = 0; + + /* Increase microphone level only if it's less than 50% */ + midVal = (stt->maxAnalog + stt->minLevel + 1) / 2; + if (*inMicLevel < midVal) { + /* *inMicLevel *= 1.1; */ + *inMicLevel = (1126 * *inMicLevel) >> 10; + /* Reduces risk of a muted mic repeatedly triggering excessive levels due + * to zero signal detection. */ + *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); + stt->micVol = *inMicLevel; + } + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* The AGC has a tendency (due to problems with the VAD parameters), to + * vastly increase the volume after a muting event. This timer prevents + * upwards adaptation for a short period. */ + stt->muteGuardMs = kMuteGuardTimeMs; + } +} + +void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc* stt) { + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + + int32_t tmp32; + int16_t vadThresh; + + if (stt->vadMic.stdLongTerm < 2500) { + stt->vadThreshold = 1500; + } else { + vadThresh = kNormalVadThreshold; + if (stt->vadMic.stdLongTerm < 4500) { + /* Scale between min and max threshold */ + vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2; + } + + /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ + tmp32 = vadThresh + 31 * stt->vadThreshold; + stt->vadThreshold = (int16_t)(tmp32 >> 5); + } +} + +void WebRtcAgc_ExpCurve(int16_t volume, int16_t* index) { + // volume in Q14 + // index in [0-7] + /* 8 different curves */ + if (volume > 5243) { + if (volume > 7864) { + if (volume > 12124) { + *index = 7; + } else { + *index = 6; + } + } else { + if (volume > 6554) { + *index = 5; + } else { + *index = 4; + } + } + } else { + if (volume > 2621) { + if (volume > 3932) { + *index = 3; + } else { + *index = 2; + } + } else { + if (volume > 1311) { + *index = 1; + } else { + *index = 0; + } + } + } +} + +int32_t WebRtcAgc_ProcessAnalog(void* state, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t vadLogRatio, + int16_t echo, + uint8_t* saturationWarning) { + uint32_t tmpU32; + int32_t Rxx16w32, tmp32; + int32_t inMicLevelTmp, lastMicVol; + int16_t i; + uint8_t saturated = 0; + LegacyAgc* stt; + + stt = reinterpret_cast(state); + inMicLevelTmp = inMicLevel << stt->scale; + + if (inMicLevelTmp > stt->maxAnalog) { + return -1; + } else if (inMicLevelTmp < stt->minLevel) { + return -1; + } + + if (stt->firstCall == 0) { + int32_t tmpVol; + stt->firstCall = 1; + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + tmpVol = (stt->minLevel + tmp32); + + /* If the mic level is very low at start, increase it! */ + if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) { + inMicLevelTmp = tmpVol; + } + stt->micVol = inMicLevelTmp; + } + + /* Set the mic level to the previous output value if there is digital input + * gain */ + if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) { + inMicLevelTmp = stt->micVol; + } + + /* If the mic level was manually changed to a very low value raise it! */ + if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) { + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + inMicLevelTmp = (stt->minLevel + tmp32); + stt->micVol = inMicLevelTmp; + } + + if (inMicLevelTmp != stt->micVol) { + if (inMicLevel == stt->lastInMicLevel) { + // We requested a volume adjustment, but it didn't occur. This is + // probably due to a coarse quantization of the volume slider. + // Restore the requested value to prevent getting stuck. + inMicLevelTmp = stt->micVol; + } else { + // As long as the value changed, update to match. + stt->micVol = inMicLevelTmp; + } + } + + if (inMicLevelTmp > stt->maxLevel) { + // Always allow the user to raise the volume above the maxLevel. + stt->maxLevel = inMicLevelTmp; + } + + // Store last value here, after we've taken care of manual updates etc. + stt->lastInMicLevel = inMicLevel; + lastMicVol = stt->micVol; + + /* Checks if the signal is saturated. Also a check if individual samples + * are larger than 12000 is done. If they are the counter for increasing + * the volume level is set to -100ms + */ + WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); + + /* The AGC is always allowed to lower the level if the signal is saturated */ + if (saturated == 1) { + /* Lower the recording level + * Rxx160_LP is adjusted down because it is so slow it could + * cause the AGC to make wrong decisions. */ + /* stt->Rxx160_LPw32 *= 0.875; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; + + stt->zeroCtrlMax = stt->micVol; + + /* stt->micVol *= 0.903; */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 2) { + stt->micVol = lastMicVol - 2; + } + inMicLevelTmp = stt->micVol; + + if (stt->micVol < stt->minOutput) { + *saturationWarning = 1; + } + + /* Reset counter for decrease of volume level to avoid + * decreasing too much. The saturation control can still + * lower the level if needed. */ + stt->msTooHigh = -100; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. This must be done since + * the measure is very slow. */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* Reset to initial values */ + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + stt->changeToSlowMode = 0; + + stt->muteGuardMs = 0; + + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; + } + + /* Check if the input speech is zero. If so the mic volume + * is increased. On some computers the input is zero up as high + * level as 17% */ + WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); + + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + WebRtcAgc_SpeakerInactiveCtrl(stt); + + for (i = 0; i < 5; i++) { + /* Computed on blocks of 16 samples */ + + Rxx16w32 = stt->Rxx16w32_array[0][i]; + + /* Rxx160w32 in Q(-7) */ + tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3; + stt->Rxx160w32 = stt->Rxx160w32 + tmp32; + stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; + + /* Circular buffer */ + stt->Rxx16pos++; + if (stt->Rxx16pos == kRxxBufferLen) { + stt->Rxx16pos = 0; + } + + /* Rxx16_LPw32 in Q(-4) */ + tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm; + stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; + + if (vadLogRatio > stt->vadThreshold) { + /* Speech detected! */ + + /* Check if Rxx160_LP is in the correct range. If + * it is too high/low then we set it to the maximum of + * Rxx16_LPw32 during the first 200ms of speech. + */ + if (stt->activeSpeech < 250) { + stt->activeSpeech += 2; + + if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) { + stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; + } + } else if (stt->activeSpeech == 250) { + stt->activeSpeech += 2; + tmp32 = stt->Rxx16_LPw32Max >> 3; + stt->Rxx160_LPw32 = tmp32 * kRxxBufferLen; + } + + tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm; + stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; + + if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechOuterChange) { + stt->msTooHigh = 0; + + /* Lower the recording level */ + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = stt->Rxx160_LPw32 >> 6; + stt->Rxx160_LPw32 = tmp32 * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.95 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. + */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + } + } else if (stt->Rxx160_LPw32 > stt->upperLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechInnerChange) { + /* Lower the recording level */ + stt->msTooHigh = 0; + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.965 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + WEBRTC_SPL_UMUL(31621, (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechOuterChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 + */ + weightFIX = + kOffset1[index] - (int16_t)((kSlope1[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 2) { + stt->micVol = lastMicVol + 2; + } + + inMicLevelTmp = stt->micVol; + } + } else if (stt->Rxx160_LPw32 < stt->lowerLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechInnerChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 + */ + weightFIX = + kOffset2[index] - (int16_t)((kSlope2[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 1) { + stt->micVol = lastMicVol + 1; + } + + inMicLevelTmp = stt->micVol; + } + } else { + /* The signal is inside the desired range which is: + * lowerLimit < Rxx160_LP/640 < upperLimit + */ + if (stt->changeToSlowMode > 4000) { + stt->msecSpeechInnerChange = 1000; + stt->msecSpeechOuterChange = 500; + stt->upperLimit = stt->upperPrimaryLimit; + stt->lowerLimit = stt->lowerPrimaryLimit; + } else { + stt->changeToSlowMode += 2; // in milliseconds + } + stt->msTooLow = 0; + stt->msTooHigh = 0; + + stt->micVol = inMicLevelTmp; + } + } + } + + /* Ensure gain is not increased in presence of echo or after a mute event + * (but allow the zeroCtrl() increase on the frame of a mute detection). + */ + if (echo == 1 || + (stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) { + if (stt->micVol > lastMicVol) { + stt->micVol = lastMicVol; + } + } + + /* limit the gain */ + if (stt->micVol > stt->maxLevel) { + stt->micVol = stt->maxLevel; + } else if (stt->micVol < stt->minOutput) { + stt->micVol = stt->minOutput; + } + + *outMicLevel = WEBRTC_SPL_MIN(stt->micVol, stt->maxAnalog) >> stt->scale; + + return 0; +} + +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]) { + LegacyAgc* stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->fs == 8000) { + if (samples != 80) { + return -1; + } + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) { + return -1; + } + } else { + return -1; + } + + *saturationWarning = 0; + // TODO(minyue): PUT IN RANGE CHECKING FOR INPUT LEVELS + *outMicLevel = inMicLevel; + + int32_t error = + WebRtcAgc_ComputeDigitalGains(&stt->digitalAgc, in_near, num_bands, + stt->fs, stt->lowLevelSignal, gains); + if (error == -1) { + return -1; + } + + if (stt->agcMode < kAgcModeFixedDigital && + (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) { + if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevel, outMicLevel, + stt->vadMic.logRatio, echo, + saturationWarning) == -1) { + return -1; + } + } + + /* update queue */ + if (stt->inQueue > 1) { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t)); + memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(int32_t)); + } + + if (stt->inQueue > 0) { + stt->inQueue--; + } + + return 0; +} + +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out) { + const LegacyAgc* stt = (const LegacyAgc*)agcInst; + return WebRtcAgc_ApplyDigitalGains(gains, num_bands, stt->fs, in_near, out); +} + +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig agcConfig) { + LegacyAgc* stt; + stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + if (agcConfig.limiterEnable != kAgcFalse && + agcConfig.limiterEnable != kAgcTrue) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->limiterEnable = agcConfig.limiterEnable; + stt->compressionGaindB = agcConfig.compressionGaindB; + if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->targetLevelDbfs = agcConfig.targetLevelDbfs; + + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->compressionGaindB += agcConfig.targetLevelDbfs; + } + + /* Update threshold levels for analog adaptation */ + WebRtcAgc_UpdateAgcThresholds(stt); + + /* Recalculate gain table */ + if (WebRtcAgc_CalculateGainTable( + &(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) { + return -1; + } + /* Store the config in a WebRtcAgcConfig */ + stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; + stt->usedConfig.limiterEnable = agcConfig.limiterEnable; + stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; + + return 0; +} + +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config) { + LegacyAgc* stt; + stt = reinterpret_cast(agcInst); + + if (stt == NULL) { + return -1; + } + + if (config == NULL) { + stt->lastError = AGC_NULL_POINTER_ERROR; + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + config->limiterEnable = stt->usedConfig.limiterEnable; + config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs; + config->compressionGaindB = stt->usedConfig.compressionGaindB; + + return 0; +} + +void* WebRtcAgc_Create() { + LegacyAgc* stt = static_cast(malloc(sizeof(LegacyAgc))); + + stt->initFlag = 0; + stt->lastError = 0; + + return stt; +} + +void WebRtcAgc_Free(void* state) { + LegacyAgc* stt; + + stt = reinterpret_cast(state); + free(stt); +} + +/* minLevel - Minimum volume level + * maxLevel - Maximum volume level + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs) { + int32_t max_add, tmp32; + int16_t i; + int tmpNorm; + LegacyAgc* stt; + + /* typecast state pointer */ + stt = reinterpret_cast(agcInst); + + if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + /* Analog AGC variables */ + stt->envSum = 0; + + /* mode = 0 - Only saturation protection + * 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] + */ + if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) { + return -1; + } + stt->agcMode = agcMode; + stt->fs = fs; + + /* initialize input VAD */ + WebRtcAgc_InitVad(&stt->vadMic); + + /* If the volume range is smaller than 0-256 then + * the levels are shifted up to Q8-domain */ + tmpNorm = WebRtcSpl_NormU32((uint32_t)maxLevel); + stt->scale = tmpNorm - 23; + if (stt->scale < 0) { + stt->scale = 0; + } + // TODO(bjornv): Investigate if we really need to scale up a small range now + // when we have + // a guard against zero-increments. For now, we do not support scale up (scale + // = 0). + stt->scale = 0; + maxLevel <<= stt->scale; + minLevel <<= stt->scale; + + /* Make minLevel and maxLevel static in AdaptiveDigital */ + if (stt->agcMode == kAgcModeAdaptiveDigital) { + minLevel = 0; + maxLevel = 255; + stt->scale = 0; + } + /* The maximum supplemental volume range is based on a vague idea + * of how much lower the gain will be than the real analog gain. */ + max_add = (maxLevel - minLevel) / 4; + + /* Minimum/maximum volume level that can be set */ + stt->minLevel = minLevel; + stt->maxAnalog = maxLevel; + stt->maxLevel = maxLevel + max_add; + stt->maxInit = stt->maxLevel; + + stt->zeroCtrlMax = stt->maxAnalog; + stt->lastInMicLevel = 0; + + /* Initialize micVol parameter */ + stt->micVol = stt->maxAnalog; + if (stt->agcMode == kAgcModeAdaptiveDigital) { + stt->micVol = 127; /* Mid-point of mic level */ + } + stt->micRef = stt->micVol; + stt->micGainIdx = 127; + + /* Minimum output volume is 4% higher than the available lowest volume level + */ + tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8; + stt->minOutput = (stt->minLevel + tmp32); + + stt->msTooLow = 0; + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->firstCall = 0; + stt->msZero = 0; + stt->muteGuardMs = 0; + stt->gainTableIdx = 0; + + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + stt->vadThreshold = kNormalVadThreshold; + stt->inActive = 0; + + for (i = 0; i < kRxxBufferLen; i++) { + stt->Rxx16_vectorw32[i] = (int32_t)1000; /* -54dBm0 */ + } + stt->Rxx160w32 = 125 * kRxxBufferLen; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ + + stt->Rxx16pos = 0; + stt->Rxx16_LPw32 = (int32_t)16284; /* Q(-4) */ + + for (i = 0; i < 5; i++) { + stt->Rxx16w32_array[0][i] = 0; + } + for (i = 0; i < 10; i++) { + stt->env[0][i] = 0; + stt->env[1][i] = 0; + } + stt->inQueue = 0; + + WebRtcSpl_MemSetW32(stt->filterState, 0, 8); + + stt->initFlag = kInitCheck; + // Default config settings. + stt->defaultConfig.limiterEnable = kAgcTrue; + stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL; + stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN; + + if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) { + stt->lastError = AGC_UNSPECIFIED_ERROR; + return -1; + } + stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value + + stt->lowLevelSignal = 0; + + /* Only positive values are allowed that are not too large */ + if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) { + return -1; + } else { + return 0; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h new file mode 100644 index 0000000000..22cd924a93 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ + + +#include "modules/audio_processing/agc/legacy/digital_agc.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" + +namespace webrtc { + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +constexpr int16_t kRxxBufferLen = 10; + +static const int16_t kMsecSpeechInner = 520; +static const int16_t kMsecSpeechOuter = 340; + +static const int16_t kNormalVadThreshold = 400; + +static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct { + // Configurable parameters/variables + uint32_t fs; // Sampling frequency + int16_t compressionGaindB; // Fixed gain level in dB + int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + uint8_t limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgcConfig defaultConfig; + WebRtcAgcConfig usedConfig; + + // General variables + int16_t initFlag; + int16_t lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + int32_t analogTargetLevel; // = kRxxBufferLen * 846805; -22 dBfs + int32_t startUpperLimit; // = kRxxBufferLen * 1066064; -21 dBfs + int32_t startLowerLimit; // = kRxxBufferLen * 672641; -23 dBfs + int32_t upperPrimaryLimit; // = kRxxBufferLen * 1342095; -20 dBfs + int32_t lowerPrimaryLimit; // = kRxxBufferLen * 534298; -24 dBfs + int32_t upperSecondaryLimit; // = kRxxBufferLen * 2677832; -17 dBfs + int32_t lowerSecondaryLimit; // = kRxxBufferLen * 267783; -27 dBfs + uint16_t targetIdx; // Table index for corresponding target level + int16_t analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + int32_t filterState[8]; // For downsampling wb to nb + int32_t upperLimit; // Upper limit for mic energy + int32_t lowerLimit; // Lower limit for mic energy + int32_t Rxx160w32; // Average energy for one frame + int32_t Rxx16_LPw32; // Low pass filtered subframe energies + int32_t Rxx160_LPw32; // Low pass filtered frame energies + int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe + int32_t Rxx16_vectorw32[kRxxBufferLen]; // Array with subframe energies + int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal + int32_t env[2][10]; // Envelope values of subframes + + int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 + int16_t envSum; // Filtered scaled envelope in subframes + int16_t vadThreshold; // Threshold for VAD decision + int16_t inActive; // Inactive time in milliseconds + int16_t msTooLow; // Milliseconds of speech at a too low level + int16_t msTooHigh; // Milliseconds of speech at a too high level + int16_t changeToSlowMode; // Change to slow mode after some time at target + int16_t firstCall; // First call to the process-function + int16_t msZero; // Milliseconds of zero input + int16_t msecSpeechOuterChange; // Min ms of speech between volume changes + int16_t msecSpeechInnerChange; // Min ms of speech between volume changes + int16_t activeSpeech; // Milliseconds of active speech + int16_t muteGuardMs; // Counter to prevent mute action + int16_t inQueue; // 10 ms batch indicator + + // Microphone level variables + int32_t micRef; // Remember ref. mic level for virtual mic + uint16_t gainTableIdx; // Current position in virtual gain table + int32_t micGainIdx; // Gain index of mic level to increase slowly + int32_t micVol; // Remember volume between frames + int32_t maxLevel; // Max possible vol level, incl dig gain + int32_t maxAnalog; // Maximum possible analog volume level + int32_t maxInit; // Initial value of "max" + int32_t minLevel; // Minimum possible volume level + int32_t minOutput; // Minimum output volume level + int32_t zeroCtrlMax; // Remember max gain => don't amp low input + int32_t lastInMicLevel; + + int16_t scale; // Scale factor for internal volume levels + // Structs for VAD and digital_agc + AgcVad vadMic; + DigitalAgc digitalAgc; + + int16_t lowLevelSignal; +} LegacyAgc; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc new file mode 100644 index 0000000000..4cd86acba8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/legacy/digital_agc.h" + +#include + +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( +// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / +// log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic +// (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input +// (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; +// xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +enum { kGenFuncTableSize = 128 }; +static const uint16_t kGenFuncTable[kGenFuncTableSize] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, + 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, + 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819, + 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881, + 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944, + 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006, + 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069, + 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194, + 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257, + 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320, + 44689, 45058, 45428, 45797, 46166, 46536, 46905}; + +static const int16_t kAvgDecayTime = 250; // frames; < 3000 + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13)) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) \ + ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) + +} // namespace + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t digCompGaindB, // Q0 + int16_t targetLevelDbfs, // Q0 + uint8_t limiterEnable, + int16_t analogTarget) { // Q0 + // This function generates the compressor gain table used in the fixed digital + // part. + uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; + int32_t inLevel, limiterLvl; + int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const uint16_t kLog10 = 54426; // log2(10) in Q14 + const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 + const uint16_t kLogE_1 = 23637; // log2(e) in Q14 + uint16_t constMaxGain; + uint16_t tmpU16, intPart, fracPart; + const int16_t kCompRatio = 3; + int16_t limiterOffset = 0; // Limiter offset + int16_t limiterIdx, limiterLvlX; + int16_t constLinApprox, maxGain, diffGain; + int16_t i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants + // kLogE_1 = 23637; // log2(e) in Q14 + // kLog10 = 54426; // log2(10) in Q14 + // kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = maxGain * kCompRatio; + if ((digCompGaindB <= analogTarget) && (limiterEnable)) { + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v + tmp32no1 = digCompGaindB * (kCompRatio - 1); + diffGain = + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0 || diffGain >= kGenFuncTableSize) { + RTC_DCHECK(0); + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13), + kLog10_2 / 2); + tmp16no1 = + WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to + // linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) { + // Calculate scaled input level (compressor): + // inLevel = + // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (uint16_t)(absInLevel >> 14); + fracPart = + (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = tmpU16 * fracPart; // Q22 + tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 + logApprox = tmpU32no1 >> 8; // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) { + // Not enough space for multiplication + tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) { + zerosScale = 9 - zeros; + tmpU32no1 >>= zerosScale; // Q(zeros+13) + } else { + tmpU32no2 >>= zeros - 9; // Q22 + } + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 >>= 6; // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) { + logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14 + } + } + numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14 + numFIX -= (int32_t)logApprox * diffGain; // Q14 + + // Calculate ratio + // Shift `numFIX` as much as possible. + // Ensure we avoid wrap-around in `den` as well. + if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // `den` is Q8. + zeros = WebRtcSpl_NormW32(numFIX); + } else { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX *= 1 << zeros; // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1) + y32 = numFIX / tmp32no1; // in Q15 + // This is to do rounding in Q14. + y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1); + + if (limiterEnable && (i < limiterIdx)) { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= limiterLvl * (1 << 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) { + tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 + tmp32 >>= 13; // In Q14. + } else { + tmp32 = y32 * kLog10 + 8192; // in Q28 + tmp32 >>= 14; // In Q14. + } + tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) { + intPart = (int16_t)(tmp32 >> 14); + fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 + if ((fracPart >> 13) != 0) { + tmp16 = (2 << 14) - constLinApprox; + tmp32no2 = (1 << 14) - fracPart; + tmp32no2 *= tmp16; + tmp32no2 >>= 13; + tmp32no2 = (1 << 14) - tmp32no2; + } else { + tmp16 = constLinApprox - (1 << 14); + tmp32no2 = (fracPart * tmp16) >> 13; + } + fracPart = (uint16_t)tmp32no2; + gainTable[i] = + (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else { + gainTable[i] = 0; + } + } + + return 0; +} + +int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { + if (agcMode == kAgcModeFixedDigital) { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, + const int16_t* in_far, + size_t nrSamples) { + RTC_DCHECK(stt); + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +// Gains is an 11 element long array (one value per ms, incl start & end). +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt, + const int16_t* const* in_near, + size_t num_bands, + uint32_t FS, + int16_t lowlevelSignal, + int32_t gains[11]) { + int32_t tmp32; + int32_t env[10]; + int32_t max_nrg; + int32_t cur_level; + int32_t gain32; + int16_t logratio; + int16_t lower_thr, upper_thr; + int16_t zeros = 0, zeros_fast, frac = 0; + int16_t decay; + int16_t gate, gain_adj; + int16_t k; + size_t n, L; + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + } else { + return -1; + } + + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) { + tmp32 = 3 * logratio; + logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) { + decay = 0; + } else { + // decay = (int16_t)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = (lower_thr - logratio) * 65; + decay = (int16_t)(tmp32 >> 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) { + if (stt->vadNearend.stdLongTerm < 4000) { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) { + // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> + // 12); + tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; + decay = (int16_t)(tmp32 >> 12); + } + + if (lowlevelSignal != 0) { + decay = 0; + } + } + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) { + int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = + AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) { + // increase capacitorSlow + stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), + stt->capacitorSlow); + } else { + // decrease capacitorSlow + stt->capacitorSlow = + AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) { + cur_level = stt->capacitorFast; + } else { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((uint32_t)cur_level); + if (cur_level == 0) { + zeros = 31; + } + tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF; + frac = (int16_t)(tmp32 >> 19); // Q12. + // Interpolate between gainTable[zeros] and gainTable[zeros-1]. + tmp32 = + ((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >> + 12; + gains[k + 1] = stt->gainTable[zeros] + tmp32; + } + + // Gate processing (lower gain during absence of speech) + zeros = (zeros << 9) - (frac >> 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); + if (stt->capacitorFast == 0) { + zeros_fast = 31; + } + tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; + zeros_fast <<= 9; + zeros_fast -= (int16_t)(tmp32 >> 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) { + stt->gatePrevious = 0; + } else { + tmp32 = stt->gatePrevious * 7; + gate = (int16_t)((gate + tmp32) >> 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) { + if (gate < 2500) { + gain_adj = (2500 - gate) >> 5; + } else { + gain_adj = 0; + } + for (k = 0; k < 10; k++) { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { + // To prevent wraparound + tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; + tmp32 *= 178 + gain_adj; + } else { + tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj); + tmp32 >>= 8; + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) { + // Find a shift of gains[k + 1] such that it can be squared without + // overflow, but at least by 10 bits. + zeros = 10; + if (gains[k + 1] > 47452159) { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + // check for overflow + while (AGC_MUL32((env[k] >> 12) + 1, gain32) > + WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) { + // Prevent wrap around + gains[k + 1] = (gains[k + 1] / 256) * 253; + } else { + gains[k + 1] = (gains[k + 1] * 253) / 256; + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) { + if (gains[k] > gains[k + 1]) { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + return 0; +} + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out) { + // Apply gain + // handle first sub frame separately + size_t L; + int16_t L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + L2 = 3; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + L2 = 4; + } else { + return -1; + } + + for (size_t i = 0; i < num_bands; ++i) { + if (in_near[i] != out[i]) { + // Only needed if they don't already point to the same place. + memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); + } + } + + // iterate over samples + int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2)); + int32_t gain32 = gains[0] * (1 << 4); + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16; + if (out_tmp > 4095) { + out[i][n] = (int16_t)32767; + } else if (out_tmp < -4096) { + out[i][n] = (int16_t)-32768; + } else { + int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16; + out[i][n] = (int16_t)tmp32; + } + } + + gain32 += delta; + } + // iterate over subframes + for (int k = 1; k < 10; k++) { + delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); + gain32 = gains[k] * (1 << 4); + // iterate over samples + for (size_t n = 0; n < L; n++) { + for (size_t i = 0; i < num_bands; ++i) { + int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4); + tmp64 = tmp64 >> 16; + if (tmp64 > 32767) { + out[i][k * L + n] = 32767; + } else if (tmp64 < -32768) { + out[i][k * L + n] = -32768; + } else { + out[i][k * L + n] = (int16_t)(tmp64); + } + } + gain32 += delta; + } + } + return 0; +} + +void WebRtcAgc_InitVad(AgcVad* state) { + int16_t k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = 15 << 10; + + // variance of input level (Q8) + state->varianceLongTerm = 500 << 8; + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = 15 << 10; + + // short-term variance of input level (Q8) + state->varianceShortTerm = 500 << 8; + + state->stdShortTerm = + 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) { + // downsampling filter + state->downState[k] = 0; + } +} + +int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples) { // (i) number of samples + uint32_t nrg; + int32_t out, tmp32, tmp32b; + uint16_t tmpU16; + int16_t k, subfr, tmp16; + int16_t buf1[8]; + int16_t buf2[4]; + int16_t HPstate; + int16_t zeros, dB; + int64_t tmp64; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) { + // downsample to 4 kHz + if (nrSamples == 160) { + for (k = 0; k < 8; k++) { + tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; + tmp32 >>= 1; + buf1[k] = (int16_t)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) { + out = buf2[k] + HPstate; + tmp32 = 600 * out; + HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); + + // Add 'out * out / 2**6' to 'nrg' in a non-overflowing + // way. Guaranteed to work as long as 'out * out / 2**6' fits in + // an int32_t. + nrg += out * (out / (1 << 6)); + nrg += out * (out % (1 << 6)) / (1 << 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = (15 - zeros) * (1 << 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = state->meanShortTerm * 15 + dB; + state->meanShortTerm = (int16_t)(tmp32 >> 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceShortTerm * 15; + state->varianceShortTerm = tmp32 / 16; + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanShortTerm * state->meanShortTerm; + tmp32 = (state->varianceShortTerm << 12) - tmp32; + state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = state->meanLongTerm * state->counter + dB; + state->meanLongTerm = + WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceLongTerm * state->counter; + state->varianceLongTerm = + WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanLongTerm * state->meanLongTerm; + tmp32 = (state->varianceLongTerm << 12) - tmp32; + state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = 3 << 12; + // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in + // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() + // was used, which did an intermediate cast to (int16_t), hence losing + // significant bits. This cause logRatio to max out positive, rather than + // negative. This is a bug, but has very little significance. + tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = (13 << 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp64 = tmp32; + tmp64 += tmp32b >> 10; + tmp64 >>= 6; + + // limit + if (tmp64 > 2048) { + tmp64 = 2048; + } else if (tmp64 < -2048) { + tmp64 = -2048; + } + state->logRatio = (int16_t)tmp64; + + return state->logRatio; // Q10 +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h new file mode 100644 index 0000000000..223c74b9bd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +typedef struct { + int32_t downState[8]; + int16_t HPstate; + int16_t counter; + int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) + int16_t meanLongTerm; // Q10 + int32_t varianceLongTerm; // Q8 + int16_t stdLongTerm; // Q10 + int16_t meanShortTerm; // Q10 + int32_t varianceShortTerm; // Q8 + int16_t stdShortTerm; // Q10 +} AgcVad; // total = 54 bytes + +typedef struct { + int32_t capacitorSlow; + int32_t capacitorFast; + int32_t gain; + int32_t gainTable[32]; + int16_t gatePrevious; + int16_t agcMode; + AgcVad vadNearend; + AgcVad vadFarend; +} DigitalAgc; + +int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode); + +int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* digitalAgcInst, + const int16_t* const* inNear, + size_t num_bands, + uint32_t FS, + int16_t lowLevelSignal, + int32_t gains[11]); + +int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11], + size_t num_bands, + uint32_t FS, + const int16_t* const* in_near, + int16_t* const* out); + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst, + const int16_t* inFar, + size_t nrSamples); + +void WebRtcAgc_InitVad(AgcVad* vadInst); + +int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples); // (i) number of samples + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t compressionGaindB, // Q0 (in dB) + int16_t targetLevelDbfs, // Q0 (in dB) + uint8_t limiterEnable, + int16_t analogTarget); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h new file mode 100644 index 0000000000..6010a988fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ + +#include +#include + +namespace webrtc { + +enum { + kAgcModeUnchanged, + kAgcModeAdaptiveAnalog, + kAgcModeAdaptiveDigital, + kAgcModeFixedDigital +}; + +enum { kAgcFalse = 0, kAgcTrue }; + +typedef struct { + int16_t targetLevelDbfs; // default 3 (-3 dBOv) + int16_t compressionGaindB; // default 9 dB + uint8_t limiterEnable; // default kAgcTrue (on) +} WebRtcAgcConfig; + +/* + * This function analyses the number of samples passed to + * farend and produces any error code that could arise. + * + * Input: + * - agcInst : AGC instance. + * - samples : Number of samples in input vector. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error. + */ +int WebRtcAgc_GetAddFarendError(void* state, size_t samples); + +/* + * This function processes a 10 ms frame of far-end speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inFar : Far-end input speech vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddFarend(void* agcInst, const int16_t* inFar, size_t samples); + +/* + * This function processes a 10 ms frame of microphone speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). For very low input levels, the input signal is increased in level + * by multiplying and overwriting the samples in inMic[]. + * + * This function should be called before any further processing of the + * near-end microphone signal. + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples); + +/* + * This function replaces the analog microphone with a virtual one. + * It is a digital gain applied to the input signal and is used in the + * agcAdaptiveDigital mode where no microphone level is adjustable. The length + * of the input speech vector must be given in samples (80 when FS=8000, and 160 + * when FS=16000, FS=32000 or FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * - micLevelIn : Input level of microphone (static) + * + * Output: + * - inMic : Microphone output after processing (L band) + * - inMic_H : Microphone output after processing (H band) + * - micLevelOut : Adjusted microphone level after processing + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut); + +/* + * This function analyses a 10 ms frame and produces the analog and digital + * gains required to normalize the signal. The gain adjustments are done only + * during active periods of speech. The length of the speech vectors must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). The echo parameter can be used to ensure the AGC will not adjust + * upward in the presence of echo. + * + * This function should be called after processing the near-end microphone + * signal, in any case after any echo cancellation. + * + * Input: + * - agcInst : AGC instance + * - inNear : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * - samples : Number of samples in input/output vector + * - inMicLevel : Current microphone volume level + * - echo : Set to 0 if the signal passed to add_mic is + * almost certainly free of echo; otherwise set + * to 1. If you have no information regarding echo + * set to 0. + * + * Output: + * - outMicLevel : Adjusted microphone volume level + * - saturationWarning : A returned value of 1 indicates a saturation event + * has occurred and the volume cannot be further + * reduced. Otherwise will be set to 0. + * - gains : Vector of gains to apply for digital normalization + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Analyze(void* agcInst, + const int16_t* const* inNear, + size_t num_bands, + size_t samples, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning, + int32_t gains[11]); + +/* + * This function processes a 10 ms frame by applying precomputed digital gains. + * + * Input: + * - agcInst : AGC instance + * - gains : Vector of gains to apply for digital normalization + * - in_near : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * + * Output: + * - out : Gain-adjusted near-end speech vector + * : May be the same vector as the input. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Process(const void* agcInst, + const int32_t gains[11], + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out); + +/* + * This function sets the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * - config : config struct + * + * Output: + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config); + +/* + * This function returns the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * + * Output: + * - config : config struct + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config); + +/* + * This function creates and returns an AGC instance, which will contain the + * state information for one (duplex) channel. + */ +void* WebRtcAgc_Create(void); + +/* + * This function frees the AGC instance created at the beginning. + * + * Input: + * - agcInst : AGC instance. + */ +void WebRtcAgc_Free(void* agcInst); + +/* + * This function initializes an AGC instance. + * + * Input: + * - agcInst : AGC instance. + * - minLevel : Minimum possible mic level + * - maxLevel : Maximum possible mic level + * - agcMode : 0 - Unchanged + * : 1 - Adaptive Analog Automatic Gain Control -3dBOv + * : 2 - Adaptive Digital Automatic Gain Control -3dBOv + * : 3 - Fixed Digital Gain 0dB + * - fs : Sampling frequency + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build new file mode 100644 index 0000000000..0188a8ac10 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/legacy_agc_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/analog_agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/legacy/digital_agc.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_agc_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build new file mode 100644 index 0000000000..9db9a639e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/level_estimation_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc/agc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc", + "/third_party/libwebrtc/modules/audio_processing/agc/utility.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("level_estimation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc new file mode 100644 index 0000000000..b0a1f53b97 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = + static_cast(kLowProbabilityThreshold * kProbQDomain); + +LoudnessHistogram::LoudnessHistogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + static_assert( + kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), + "histogram bin centers incorrect size"); +} + +LoudnessHistogram::LoudnessHistogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +LoudnessHistogram::~LoudnessHistogram() {} + +void LoudnessHistogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = + static_cast(floor(activity_probaility * kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void LoudnessHistogram::RemoveOldestEntryAndUpdate() { + RTC_DCHECK_GT(len_circular_buffer_, 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void LoudnessHistogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // `kTransientWidthThreshold` or there has not been any transient. + RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold); + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double LoudnessHistogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +LoudnessHistogram* LoudnessHistogram::Create() { + return new LoudnessHistogram; +} + +LoudnessHistogram* LoudnessHistogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new LoudnessHistogram(window_size); +} + +void LoudnessHistogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int LoudnessHistogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast( + floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double LoudnessHistogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h new file mode 100644 index 0000000000..51b38714c2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ + +#include + +#include + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class LoudnessHistogram { + public: + // Create a non-sliding LoudnessHistogram. + static LoudnessHistogram* Create(); + + // Create a sliding LoudnessHistogram, i.e. the histogram represents the last + // `window_size` samples. + static LoudnessHistogram* Create(int window_size); + ~LoudnessHistogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + LoudnessHistogram(); + explicit LoudnessHistogram(int window); + + // Find the histogram bin associated with the given `rms`. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // `bin_count_q10_`. + int64_t audio_content_q10_; + + // LoudnessHistogram of input RMS in Q10 with `kHistSize_` bins. In each + // 'Update(),' we increment the associated histogram-bin with the given + // probability. The increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + std::unique_ptr activity_probability_; + // Circular buffer for histogram-indices of probabilities. + std::unique_ptr hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc new file mode 100644 index 0000000000..bbc0a7ee92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Use CreateHistUnittestFile.m to generate the input file. + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc/utility.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +struct InputOutput { + double rms; + double activity_probability; + double audio_content; + double loudness; +}; + +const double kRelativeErrTol = 1e-10; + +class LoudnessHistogramTest : public ::testing::Test { + protected: + void RunTest(bool enable_circular_buff, absl::string_view filename); + + private: + void TestClean(); + std::unique_ptr hist_; +}; + +void LoudnessHistogramTest::TestClean() { + EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02); + EXPECT_EQ(hist_->AudioContent(), 0); + EXPECT_EQ(hist_->num_updates(), 0); +} + +void LoudnessHistogramTest::RunTest(bool enable_circular_buff, + absl::string_view filename) { + FILE* in_file = fopen(std::string(filename).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + if (enable_circular_buff) { + int buffer_size; + EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u); + hist_.reset(LoudnessHistogram::Create(buffer_size)); + } else { + hist_.reset(LoudnessHistogram::Create()); + } + TestClean(); + + InputOutput io; + int num_updates = 0; + while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) { + if (io.rms < 0) { + // We have to reset. + hist_->Reset(); + TestClean(); + num_updates = 0; + // Read the next chunk of input. + if (fread(&io, sizeof(InputOutput), 1, in_file) != 1) + break; + } + hist_->Update(io.rms, io.activity_probability); + num_updates++; + EXPECT_EQ(hist_->num_updates(), num_updates); + double audio_content = hist_->AudioContent(); + + double abs_err = + std::min(audio_content, io.audio_content) * kRelativeErrTol; + + ASSERT_NEAR(audio_content, io.audio_content, abs_err); + double current_loudness = Linear2Loudness(hist_->CurrentRms()); + abs_err = + std::min(fabs(current_loudness), fabs(io.loudness)) * kRelativeErrTol; + ASSERT_NEAR(current_loudness, io.loudness, abs_err); + } + fclose(in_file); +} + +TEST_F(LoudnessHistogramTest, ActiveCircularBuffer) { + RunTest(true, test::ResourcePath( + "audio_processing/agc/agc_with_circular_buffer", "dat") + .c_str()); +} + +TEST_F(LoudnessHistogramTest, InactiveCircularBuffer) { + RunTest(false, test::ResourcePath( + "audio_processing/agc/agc_no_circular_buffer", "dat") + .c_str()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h new file mode 100644 index 0000000000..3080e1563c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/mock_agc.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/agc/agc.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAgc : public Agc { + public: + virtual ~MockAgc() {} + MOCK_METHOD(void, Process, (rtc::ArrayView audio), (override)); + MOCK_METHOD(bool, GetRmsErrorDb, (int* error), (override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(int, set_target_level_dbfs, (int level), (override)); + MOCK_METHOD(int, target_level_dbfs, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.cc b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000000..2a87e5ce74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/utility.h" + +#include + +namespace webrtc { + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc/utility.h b/third_party/libwebrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000000..56eec244a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +namespace webrtc { + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn new file mode 100644 index 0000000000..bd59ad3dae --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn @@ -0,0 +1,511 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("speech_level_estimator") { + sources = [ + "speech_level_estimator.cc", + "speech_level_estimator.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:api", + "..:apm_logging", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_library("adaptive_digital_gain_controller") { + sources = [ + "adaptive_digital_gain_controller.cc", + "adaptive_digital_gain_controller.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + ":gain_applier", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:metrics", + ] +} + +rtc_library("saturation_protector") { + sources = [ + "saturation_protector.cc", + "saturation_protector.h", + "saturation_protector_buffer.cc", + "saturation_protector_buffer.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:apm_logging", + "../../../rtc_base:checks", + "../../../rtc_base:safe_compare", + "../../../rtc_base:safe_minmax", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("biquad_filter") { + visibility = [ "./*" ] + sources = [ + "biquad_filter.cc", + "biquad_filter.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:macromagic", + ] +} + +rtc_library("clipping_predictor") { + visibility = [ + "../agc:agc", + "./*", + ] + + sources = [ + "clipping_predictor.cc", + "clipping_predictor.h", + "clipping_predictor_level_buffer.cc", + "clipping_predictor_level_buffer.h", + ] + + deps = [ + ":gain_map", + "..:api", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_source_set("common") { + sources = [ "agc2_common.h" ] +} + +rtc_library("fixed_digital") { + sources = [ + "fixed_digital_level_estimator.cc", + "fixed_digital_level_estimator.h", + "interpolated_gain_curve.cc", + "interpolated_gain_curve.h", + "limiter.cc", + "limiter.h", + ] + + visibility = [ + "..:gain_controller2", + "../../audio_mixer:audio_mixer_impl", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_library("gain_applier") { + sources = [ + "gain_applier.cc", + "gain_applier.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + ":common", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_source_set("gain_map") { + visibility = [ + "..:analog_mic_simulation", + "../agc:agc", + "./*", + ] + + sources = [ "gain_map_internal.h" ] +} + +rtc_library("input_volume_controller") { + sources = [ + "input_volume_controller.cc", + "input_volume_controller.h", + "speech_probability_buffer.cc", + "speech_probability_buffer.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":clipping_predictor", + ":gain_map", + ":input_volume_stats_reporter", + "..:api", + "..:audio_buffer", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("noise_level_estimator") { + sources = [ + "noise_level_estimator.cc", + "noise_level_estimator.h", + ] + deps = [ + ":biquad_filter", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] +} + +rtc_library("vad_wrapper") { + sources = [ + "vad_wrapper.cc", + "vad_wrapper.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":common", + ":cpu_features", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "rnn_vad", + "rnn_vad:rnn_vad_common", + ] +} + +rtc_library("cpu_features") { + sources = [ + "cpu_features.cc", + "cpu_features.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + ] +} + +rtc_library("speech_level_estimator_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "speech_level_estimator_unittest.cc" ] + deps = [ + ":common", + ":speech_level_estimator", + "..:api", + "..:apm_logging", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("adaptive_digital_gain_controller_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "adaptive_digital_gain_controller_unittest.cc" ] + + deps = [ + ":adaptive_digital_gain_controller", + ":common", + ":test_utils", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../common_audio", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("gain_applier_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "gain_applier_unittest.cc" ] + deps = [ + ":gain_applier", + ":test_utils", + "..:audio_frame_view", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("saturation_protector_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "saturation_protector_buffer_unittest.cc", + "saturation_protector_unittest.cc", + ] + deps = [ + ":common", + ":saturation_protector", + "..:apm_logging", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("biquad_filter_unittests") { + testonly = true + sources = [ "biquad_filter_unittest.cc" ] + deps = [ + ":biquad_filter", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("fixed_digital_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "agc2_testing_common_unittest.cc", + "compute_interpolated_gain_curve.cc", + "compute_interpolated_gain_curve.h", + "fixed_digital_level_estimator_unittest.cc", + "interpolated_gain_curve_unittest.cc", + "limiter_db_gain_curve.cc", + "limiter_db_gain_curve.h", + "limiter_db_gain_curve_unittest.cc", + "limiter_unittest.cc", + ] + deps = [ + ":common", + ":fixed_digital", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../system_wrappers:metrics", + ] +} + +rtc_library("input_volume_controller_unittests") { + testonly = true + sources = [ + "clipping_predictor_level_buffer_unittest.cc", + "clipping_predictor_unittest.cc", + "input_volume_controller_unittest.cc", + "speech_probability_buffer_unittest.cc", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":clipping_predictor", + ":gain_map", + ":input_volume_controller", + "..:api", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:field_trial", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("noise_estimator_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "noise_level_estimator_unittest.cc" ] + deps = [ + ":noise_level_estimator", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../api:function_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("vad_wrapper_unittests") { + testonly = true + sources = [ "vad_wrapper_unittest.cc" ] + deps = [ + ":common", + ":vad_wrapper", + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../rtc_base:safe_compare", + "../../../test:test_support", + ] +} + +rtc_library("test_utils") { + testonly = true + visibility = [ + ":*", + "..:audio_processing_unittests", + ] + sources = [ + "agc2_testing_common.cc", + "agc2_testing_common.h", + "vector_float_frame.cc", + "vector_float_frame.h", + ] + deps = [ + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + ] +} + +rtc_library("input_volume_stats_reporter") { + sources = [ + "input_volume_stats_reporter.cc", + "input_volume_stats_reporter.h", + ] + deps = [ + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("input_volume_stats_reporter_unittests") { + testonly = true + sources = [ "input_volume_stats_reporter_unittest.cc" ] + deps = [ + ":input_volume_stats_reporter", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + "../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc new file mode 100644 index 0000000000..e8edab602c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr int kHeadroomHistogramMin = 0; +constexpr int kHeadroomHistogramMax = 50; +constexpr int kGainDbHistogramMax = 30; + +// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`. +// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a +// safety margin to allow transient peaks to exceed the target peak level +// without clipping. +float ComputeGainDb(float input_level_dbfs, + const AdaptiveDigitalConfig& config) { + // If the level is very low, apply the maximum gain. + if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) { + return config.max_gain_db; + } + // We expect to end up here most of the time: the level is below + // -headroom, but we can boost it to -headroom. + if (input_level_dbfs < -config.headroom_db) { + return -config.headroom_db - input_level_dbfs; + } + // The level is too high and we can't boost. + RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db); + return 0.0f; +} + +// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs` +// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns +// `target_gain_db` so that the output noise level equals +// `max_output_noise_level_dbfs`. +float LimitGainByNoise(float target_gain_db, + float input_noise_level_dbfs, + float max_output_noise_level_dbfs, + ApmDataDumper& apm_data_dumper) { + const float max_allowed_gain_db = + max_output_noise_level_dbfs - input_noise_level_dbfs; + apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db", + max_allowed_gain_db); + return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f)); +} + +float LimitGainByLowConfidence(float target_gain_db, + float last_gain_db, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain_db; + } + const float limiter_level_dbfs_before_gain = + limiter_audio_level_dbfs - last_gain_db; + + // Compute a new gain so that `limiter_level_dbfs_before_gain` + + // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`. + const float new_target_gain_db = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f); + return std::min(new_target_gain_db, target_gain_db); +} + +// Computes how the gain should change during this frame. +// Return the gain difference in db to 'last_gain_db'. +float ComputeGainChangeThisFrameDb(float target_gain_db, + float last_gain_db, + bool gain_increase_allowed, + float max_gain_decrease_db, + float max_gain_increase_db) { + RTC_DCHECK_GT(max_gain_decrease_db, 0); + RTC_DCHECK_GT(max_gain_increase_db, 0); + float target_gain_difference_db = target_gain_db - last_gain_db; + if (!gain_increase_allowed) { + target_gain_difference_db = std::min(target_gain_difference_db, 0.0f); + } + return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db, + max_gain_increase_db); +} + +} // namespace + +AdaptiveDigitalGainController::AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper_(apm_data_dumper), + gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), + config_(config), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * + kFrameDurationMs / 1000.0f), + calls_since_last_gain_log_(0), + frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold), + last_gain_db_(config_.initial_gain_db) { + RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); + RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); + RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); + RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); +} + +void AdaptiveDigitalGainController::Process(const FrameInfo& info, + AudioFrameView frame) { + RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); + RTC_DCHECK_GE(frame.num_channels(), 1); + RTC_DCHECK( + frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || + frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) + << "`frame` does not look like a 10 ms frame for an APM supported sample " + "rate"; + + // Compute the input level used to select the desired gain. + RTC_DCHECK_GT(info.headroom_db, 0.0f); + const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db; + + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_), + info.noise_rms_dbfs, config_.max_output_noise_level_dbfs, + *apm_data_dumper_), + last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable); + + // Forbid increasing the gain until enough adjacent speech frames are + // observed. + bool first_confident_speech_frame = false; + if (info.speech_probability < kVadConfidenceThreshold) { + frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_; + } else if (frames_to_gain_increase_allowed_ > 0) { + frames_to_gain_increase_allowed_--; + first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; + } + apm_data_dumper_->DumpRaw( + "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed", + frames_to_gain_increase_allowed_); + + const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0; + + float max_gain_increase_db = max_gain_change_db_per_10ms_; + if (first_confident_speech_frame) { + // No gain increase happened while waiting for a long enough speech + // sequence. Therefore, temporarily allow a faster gain increase. + RTC_DCHECK(gain_increase_allowed); + max_gain_increase_db *= adjacent_speech_frames_threshold_; + } + + const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( + target_gain_db, last_gain_db_, gain_increase_allowed, + /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_, + max_gain_increase_db); + + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db", + target_gain_db - last_gain_db_); + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db", + gain_change_this_frame_db); + + // Optimization: avoid calling math functions if gain does not + // change. + if (gain_change_this_frame_db != 0.f) { + gain_applier_.SetGainFactor( + DbToRatio(last_gain_db_ + gain_change_this_frame_db)); + } + + gain_applier_.ApplyGain(frame); + + // Remember that the gain has changed for the next iteration. + last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db", + last_gain_db_); + + // Log every 10 seconds. + calls_since_last_gain_log_++; + if (calls_since_last_gain_log_ == 1000) { + calls_since_last_gain_log_ = 0; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel", + -info.speech_level_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", + -info.noise_rms_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin, + kHeadroomHistogramMax, + kHeadroomHistogramMax - kHeadroomHistogramMin + 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", + last_gain_db_, 0, kGainDbHistogramMax, + kGainDbHistogramMax + 1); + RTC_LOG(LS_INFO) << "AGC2 adaptive digital" + << " | speech_dbfs: " << info.speech_level_dbfs + << " | noise_dbfs: " << info.noise_rms_dbfs + << " | headroom_db: " << info.headroom_db + << " | gain_db: " << last_gain_db_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h new file mode 100644 index 0000000000..01335e79db --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ + +#include + +#include "modules/audio_processing/agc2/gain_applier.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class ApmDataDumper; + +// Selects the target digital gain, decides when and how quickly to adapt to the +// target and applies the current gain to 10 ms frames. +class AdaptiveDigitalGainController { + public: + // Information about a frame to process. + struct FrameInfo { + float speech_probability; // Probability of speech in the [0, 1] range. + float speech_level_dbfs; // Estimated speech level (dBFS). + bool speech_level_reliable; // True with reliable speech level estimation. + float noise_rms_dbfs; // Estimated noise RMS level (dBFS). + float headroom_db; // Headroom (dB). + // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope_dbfs`. + float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS). + }; + + AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold); + AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete; + AdaptiveDigitalGainController& operator=( + const AdaptiveDigitalGainController&) = delete; + + // Analyzes `info`, updates the digital gain and applies it to a 10 ms + // `frame`. Supports any sample rate supported by APM. + void Process(const FrameInfo& info, AudioFrameView frame); + + private: + ApmDataDumper* const apm_data_dumper_; + GainApplier gain_applier_; + + const AudioProcessing::Config::GainController2::AdaptiveDigital config_; + const int adjacent_speech_frames_threshold_; + const float max_gain_change_db_per_10ms_; + + int calls_since_last_gain_log_; + int frames_to_gain_increase_allowed_; + float last_gain_db_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build new file mode 100644 index 0000000000..7d16c9a9f5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_digital_gain_controller_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc new file mode 100644 index 0000000000..e95cbb5067 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kMono = 1; +constexpr int kStereo = 2; +constexpr int kFrameLen10ms8kHz = 80; +constexpr int kFrameLen10ms48kHz = 480; + +constexpr float kMaxSpeechProbability = 1.0f; + +// Constants used in place of estimated noise levels. +constexpr float kNoNoiseDbfs = kMinLevelDbfs; +constexpr float kWithNoiseDbfs = -20.0f; + +// Number of additional frames to process in the tests to ensure that the tested +// adaptation processes have converged. +constexpr int kNumExtraFrames = 10; + +constexpr float GetMaxGainChangePerFrameDb( + float max_gain_change_db_per_second) { + return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f; +} + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr AdaptiveDigitalConfig kDefaultConfig{}; + +// Helper to create initialized `AdaptiveDigitalGainController` objects. +struct GainApplierHelper { + GainApplierHelper(const AdaptiveDigitalConfig& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper(0), + gain_applier(std::make_unique( + &apm_data_dumper, + config, + adjacent_speech_frames_threshold)) {} + ApmDataDumper apm_data_dumper; + std::unique_ptr gain_applier; +}; + +// Returns a `FrameInfo` sample to simulate noiseless speech detected with +// maximum probability and with level, headroom and limiter envelope chosen +// so that the resulting gain equals the default initial adaptive digital gain +// i.e., no gain adaptation is expected. +AdaptiveDigitalGainController::FrameInfo GetFrameInfoToNotAdapt( + const AdaptiveDigitalConfig& config) { + AdaptiveDigitalGainController::FrameInfo info; + info.speech_probability = kMaxSpeechProbability; + info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db; + info.speech_level_reliable = true; + info.noise_rms_dbfs = kNoNoiseDbfs; + info.headroom_db = config.headroom_db; + info.limiter_envelope_dbfs = -2.0f; + return info; +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, + GainApplierShouldNotCrash) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + // Make one call with reasonable audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig), + fake_audio.float_frame_view()); +} + +// Checks that the maximum allowed gain is applied. +TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) { + constexpr int kNumFramesToAdapt = + static_cast(kDefaultConfig.max_gain_db / + GetMaxGainChangePerFrameDb( + kDefaultConfig.max_gain_change_db_per_second)) + + kNumExtraFrames; + + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = -60.0f; + float applied_gain; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + applied_gain = fake_audio.float_frame_view().channel(0)[0]; + } + const float applied_gain_db = 20.0f * std::log10f(applied_gain); + EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr float kMaxGainChangeDbPerFrame = + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int kNumFramesToAdapt = + static_cast(initial_level_dbfs / kMaxGainChangeDbPerFrame) + + kNumExtraFrames; + + const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame); + + float last_gain_linear = 1.f; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } + + // Check that the same is true when gain decreases as well. + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 0.f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float maximal_difference = 0.0f; + float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db); + for (const auto& x : fake_audio.float_frame_view().channel(0)) { + const float difference = std::abs(x - current_value); + maximal_difference = std::max(maximal_difference, difference); + current_value = x; + } + + const float max_change_per_frame_linear = DbToRatio( + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second)); + const float max_change_per_sample = + max_change_per_frame_linear / kFrameLen10ms48kHz; + + EXPECT_LE(maximal_difference, max_change_per_sample); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.noise_rms_dbfs = kWithNoiseDbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, + CanHandlePositiveSpeechLevels) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + // Make one call with positive audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 5.0f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); +} + +TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) { + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.limiter_envelope_dbfs = 1.0f; + info.speech_level_reliable = false; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +class AdaptiveDigitalGainControllerParametrizedTest + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(AdaptiveDigitalGainControllerParametrizedTest, + DoNotIncreaseGainWithTooFewSpeechFrames) { + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold()); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + const float gain = audio.float_frame_view().channel(0)[0]; + if (i > 0) { + EXPECT_EQ(prev_gain, gain); // No gain increase applied. + } + prev_gain = gain; + } +} + +TEST_P(AdaptiveDigitalGainControllerParametrizedTest, + IncreaseGainWithEnoughSpeechFrames) { + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold()); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainController::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + prev_gain = audio.float_frame_view().channel(0)[0]; + } + + // Process one more speech frame. + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + + // An increased gain has been applied. + EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); +} + +INSTANTIATE_TEST_SUITE_P( + GainController2, + AdaptiveDigitalGainControllerParametrizedTest, + ::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h new file mode 100644 index 0000000000..4597bcd015 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ + +namespace webrtc { + +constexpr float kMinFloatS16Value = -32768.0f; +constexpr float kMaxFloatS16Value = 32767.0f; +constexpr float kMaxAbsFloatS16Value = 32768.0f; + +// Minimum audio level in dBFS scale for S16 samples. +constexpr float kMinLevelDbfs = -90.31f; + +constexpr int kFrameDurationMs = 10; +constexpr int kSubFramesInFrame = 20; +constexpr int kMaximalNumberOfSamplesPerChannel = 480; + +// Adaptive digital gain applier settings. + +// At what limiter levels should we start decreasing the adaptive digital gain. +constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f; + +// Number of milliseconds to wait to periodically reset the VAD. +constexpr int kVadResetPeriodMs = 1500; + +// Speech probability threshold to detect speech activity. +constexpr float kVadConfidenceThreshold = 0.95f; + +// Minimum number of adjacent speech frames having a sufficiently high speech +// probability to reliably detect speech activity. +constexpr int kAdjacentSpeechFramesThreshold = 12; + +// Number of milliseconds of speech frames to observe to make the estimator +// confident. +constexpr float kLevelEstimatorTimeToConfidenceMs = 400; +constexpr float kLevelEstimatorLeakFactor = + 1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs; + +// Saturation Protector settings. +constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f; +constexpr int kSaturationProtectorBufferSize = 4; + +// Number of interpolation points for each region of the limiter. +// These values have been tuned to limit the interpolated gain curve error given +// the limiter parameters and allowing a maximum error of +/- 32768^-1. +constexpr int kInterpolatedGainCurveKneePoints = 22; +constexpr int kInterpolatedGainCurveBeyondKneePoints = 10; +constexpr int kInterpolatedGainCurveTotalPoints = + kInterpolatedGainCurveKneePoints + kInterpolatedGainCurveBeyondKneePoints; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc new file mode 100644 index 0000000000..125e551b72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +std::vector LinSpace(double l, double r, int num_points) { + RTC_CHECK_GE(num_points, 2); + std::vector points(num_points); + const double step = (r - l) / (num_points - 1.0); + points[0] = l; + for (int i = 1; i < num_points - 1; i++) { + points[i] = static_cast(l) + i * step; + } + points[num_points - 1] = r; + return points; +} + +WhiteNoiseGenerator::WhiteNoiseGenerator(int min_amplitude, int max_amplitude) + : rand_gen_(42), + min_amplitude_(min_amplitude), + max_amplitude_(max_amplitude) { + RTC_DCHECK_LT(min_amplitude_, max_amplitude_); + RTC_DCHECK_LE(kMinS16, min_amplitude_); + RTC_DCHECK_LE(min_amplitude_, kMaxS16); + RTC_DCHECK_LE(kMinS16, max_amplitude_); + RTC_DCHECK_LE(max_amplitude_, kMaxS16); +} + +float WhiteNoiseGenerator::operator()() { + return static_cast(rand_gen_.Rand(min_amplitude_, max_amplitude_)); +} + +SineGenerator::SineGenerator(float amplitude, + float frequency_hz, + int sample_rate_hz) + : amplitude_(amplitude), + frequency_hz_(frequency_hz), + sample_rate_hz_(sample_rate_hz), + x_radians_(0.0f) { + RTC_DCHECK_GT(amplitude_, 0); + RTC_DCHECK_LE(amplitude_, kMaxS16); +} + +float SineGenerator::operator()() { + constexpr float kPi = 3.1415926536f; + x_radians_ += frequency_hz_ / sample_rate_hz_ * 2 * kPi; + if (x_radians_ >= 2 * kPi) { + x_radians_ -= 2 * kPi; + } + return amplitude_ * std::sinf(x_radians_); +} + +PulseGenerator::PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz) + : pulse_amplitude_(pulse_amplitude), + no_pulse_amplitude_(no_pulse_amplitude), + samples_period_( + static_cast(static_cast(sample_rate_hz) / frequency_hz)), + sample_counter_(0) { + RTC_DCHECK_GE(pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(no_pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(no_pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(sample_rate_hz, frequency_hz); +} + +float PulseGenerator::operator()() { + sample_counter_++; + if (sample_counter_ >= samples_period_) { + sample_counter_ -= samples_period_; + } + return static_cast(sample_counter_ == 0 ? pulse_amplitude_ + : no_pulse_amplitude_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h new file mode 100644 index 0000000000..afed97e83b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ + +#include +#include + +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +constexpr float kMinS16 = + static_cast(std::numeric_limits::min()); +constexpr float kMaxS16 = + static_cast(std::numeric_limits::max()); + +// Level Estimator test parameters. +constexpr float kDecayMs = 20.0f; + +// Limiter parameters. +constexpr float kLimiterMaxInputLevelDbFs = 1.f; +constexpr float kLimiterKneeSmoothnessDb = 1.f; +constexpr float kLimiterCompressionRatio = 5.f; + +// Returns evenly spaced `num_points` numbers over a specified interval [l, r]. +std::vector LinSpace(double l, double r, int num_points); + +// Generates white noise. +class WhiteNoiseGenerator { + public: + WhiteNoiseGenerator(int min_amplitude, int max_amplitude); + float operator()(); + + private: + Random rand_gen_; + const int min_amplitude_; + const int max_amplitude_; +}; + +// Generates a sine function. +class SineGenerator { + public: + SineGenerator(float amplitude, float frequency_hz, int sample_rate_hz); + float operator()(); + + private: + const float amplitude_; + const float frequency_hz_; + const int sample_rate_hz_; + float x_radians_; +}; + +// Generates periodic pulses. +class PulseGenerator { + public: + PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz); + float operator()(); + + private: + const float pulse_amplitude_; + const float no_pulse_amplitude_; + const int samples_period_; + int sample_counter_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc new file mode 100644 index 0000000000..79c3cc95d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(GainController2TestingCommon, LinSpace) { + std::vector points1 = test::LinSpace(-1.0, 2.0, 4); + const std::vector expected_points1{{-1.0, 0.0, 1.0, 2.0}}; + EXPECT_EQ(expected_points1, points1); + + std::vector points2 = test::LinSpace(0.0, 1.0, 4); + const std::vector expected_points2{{0.0, 1.0 / 3.0, 2.0 / 3.0, 1.0}}; + EXPECT_EQ(points2, expected_points2); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc new file mode 100644 index 0000000000..c1b80d7320 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include "rtc_base/arraysize.h" + +namespace webrtc { + +BiQuadFilter::BiQuadFilter(const Config& config) + : config_(config), state_({}) {} + +BiQuadFilter::~BiQuadFilter() = default; + +void BiQuadFilter::SetConfig(const Config& config) { + config_ = config; + state_ = {}; +} + +void BiQuadFilter::Reset() { + state_ = {}; +} + +void BiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + RTC_DCHECK_EQ(x.size(), y.size()); + const float config_a0 = config_.a[0]; + const float config_a1 = config_.a[1]; + const float config_b0 = config_.b[0]; + const float config_b1 = config_.b[1]; + const float config_b2 = config_.b[2]; + float state_a0 = state_.a[0]; + float state_a1 = state_.a[1]; + float state_b0 = state_.b[0]; + float state_b1 = state_.b[1]; + for (size_t k = 0, x_size = x.size(); k < x_size; ++k) { + // Use a temporary variable for `x[k]` to allow in-place processing. + const float tmp = x[k]; + float y_k = config_b0 * tmp + config_b1 * state_b0 + config_b2 * state_b1 - + config_a0 * state_a0 - config_a1 * state_a1; + state_b1 = state_b0; + state_b0 = tmp; + state_a1 = state_a0; + state_a0 = y_k; + y[k] = y_k; + } + state_.a[0] = state_a0; + state_.a[1] = state_a1; + state_.b[0] = state_b0; + state_.b[1] = state_b1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h new file mode 100644 index 0000000000..5273ff9386 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Transposed direct form I implementation of a bi-quad filter. +// b[0] + b[1] • z^(-1) + b[2] • z^(-2) +// H(z) = ------------------------------------ +// 1 + a[1] • z^(-1) + a[2] • z^(-2) +class BiQuadFilter { + public: + // Normalized filter coefficients. + // Computed as `[b, a] = scipy.signal.butter(N=2, Wn, btype)`. + struct Config { + float b[3]; // b[0], b[1], b[2]. + float a[2]; // a[1], a[2]. + }; + + explicit BiQuadFilter(const Config& config); + BiQuadFilter(const BiQuadFilter&) = delete; + BiQuadFilter& operator=(const BiQuadFilter&) = delete; + ~BiQuadFilter(); + + // Sets the filter configuration and resets the internal state. + void SetConfig(const Config& config); + + // Zeroes the filter state. + void Reset(); + + // Filters `x` and writes the output in `y`, which must have the same length + // of `x`. In-place processing is supported. + void Process(rtc::ArrayView x, rtc::ArrayView y); + + private: + Config config_; + struct State { + float b[2]; + float a[2]; + } state_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build new file mode 100644 index 0000000000..f396f42e57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("biquad_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc new file mode 100644 index 0000000000..a53036b08e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include +#include +#include + +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kFrameSize = 8; +constexpr int kNumFrames = 4; +using FloatArraySequence = + std::array, kNumFrames>; + +constexpr FloatArraySequence kBiQuadInputSeq = { + {{{-87.166290f, -8.029022f, 101.619583f, -0.294296f, -5.825764f, -8.890625f, + 10.310432f, 54.845333f}}, + {{-64.647644f, -6.883945f, 11.059189f, -95.242538f, -108.870834f, + 11.024944f, 63.044102f, -52.709583f}}, + {{-32.350529f, -18.108028f, -74.022339f, -8.986874f, -1.525581f, + 103.705513f, 6.346226f, -14.319557f}}, + {{22.645832f, -64.597153f, 55.462521f, -109.393188f, 10.117825f, + -40.019642f, -98.612228f, -8.330326f}}}}; + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kBiQuadConfig{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +// Comparing to scipy. The expected output is generated as follows: +// zi = np.float32([0, 0]) +// for i in range(4): +// yn, zi = scipy.signal.lfilter(B, A, x[i], zi=zi) +// print(yn) +constexpr FloatArraySequence kBiQuadOutputSeq = { + {{{-86.68354497f, -7.02175351f, 102.10290352f, -0.37487333f, -5.87205847f, + -8.85521608f, 10.33772563f, 54.51157181f}}, + {{-64.92531604f, -6.76395978f, 11.15534507f, -94.68073341f, -107.18177856f, + 13.24642474f, 64.84288941f, -50.97822629f}}, + {{-30.1579652f, -15.64850899f, -71.06662821f, -5.5883229f, 1.91175353f, + 106.5572003f, 8.57183046f, -12.06298473f}}, + {{24.84286614f, -62.18094158f, 57.91488056f, -106.65685933f, 13.38760103f, + -36.60367134f, -94.44880104f, -3.59920354f}}}}; + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that their relative error is above a given threshold. If the expected value +// of a pair is 0, `tolerance` is used to check the absolute error. +void ExpectNearRelative(rtc::ArrayView expected, + rtc::ArrayView computed, + const float tolerance) { + // The relative error is undefined when the expected value is 0. + // When that happens, check the absolute error instead. `safe_den` is used + // below to implement such logic. + auto safe_den = [](float x) { return (x == 0.0f) ? 1.0f : std::fabs(x); }; + ASSERT_EQ(expected.size(), computed.size()); + for (size_t i = 0; i < expected.size(); ++i) { + const float abs_diff = std::fabs(expected[i] - computed[i]); + // No failure when the values are equal. + if (abs_diff == 0.0f) { + continue; + } + SCOPED_TRACE(i); + SCOPED_TRACE(expected[i]); + SCOPED_TRACE(computed[i]); + EXPECT_LE(abs_diff / safe_den(expected[i]), tolerance); + } +} + +// Checks that filtering works when different containers are used both as input +// and as output. +TEST(BiQuadFilterTest, FilterNotInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + filter.Process(kBiQuadInputSeq[i], samples); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that filtering works when the same container is used both as input and +// as output. +TEST(BiQuadFilterTest, FilterInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + std::copy(kBiQuadInputSeq[i].begin(), kBiQuadInputSeq[i].end(), + samples.begin()); + filter.Process({samples}, {samples}); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that different configurations produce different outputs. +TEST(BiQuadFilterTest, SetConfigDifferentOutput) { + BiQuadFilter filter(/*config=*/{{0.97803048f, -1.95606096f, 0.97803048f}, + {-1.95557824f, 0.95654368f}}); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig( + {{0.09763107f, 0.19526215f, 0.09763107f}, {-0.94280904f, 0.33333333f}}); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_NE(samples1, samples2); +} + +// Checks that when `SetConfig()` is called but the filter coefficients are the +// same the filter state is reset. +TEST(BiQuadFilterTest, SetConfigResetsState) { + BiQuadFilter filter(kBiQuadConfig); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig(kBiQuadConfig); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +// Checks that when `Reset()` is called the filter state is reset. +TEST(BiQuadFilterTest, Reset) { + BiQuadFilter filter(kBiQuadConfig); + + std::array samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.Reset(); + std::array samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc new file mode 100644 index 0000000000..fd759c63e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kClippingPredictorMaxGainChange = 15; + +// Returns an input volume in the [`min_input_volume`, `max_input_volume`] range +// that reduces `gain_error_db`, which is a gain error estimated when +// `input_volume` was applied, according to a fixed gain map. +int ComputeVolumeUpdate(int gain_error_db, + int input_volume, + int min_input_volume, + int max_input_volume) { + RTC_DCHECK_GE(input_volume, 0); + RTC_DCHECK_LE(input_volume, max_input_volume); + if (gain_error_db == 0) { + return input_volume; + } + int new_volume = input_volume; + if (gain_error_db > 0) { + while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && + new_volume < max_input_volume) { + ++new_volume; + } + } else { + while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && + new_volume > min_input_volume) { + --new_volume; + } + } + return new_volume; +} + +float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { + const float crest_factor = + FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); + return crest_factor; +} + +// Crest factor-based clipping prediction and clipped level step estimation. +class ClippingEventPredictor : public ClippingPredictor { + public: + // ClippingEventPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and an estimation peak threshold `clipping_threshold` and a crest factor + // drop threshold `crest_factor_margin` (both in dB). + ClippingEventPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + float clipping_threshold, + float crest_factor_margin) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + crest_factor_margin_(crest_factor_margin) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique(buffer_length)); + } + } + + ClippingEventPredictor(const ClippingEventPredictor&) = delete; + ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; + ~ClippingEventPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step equal to `default_clipped_level_step_` + // if at least `GetMinFramesProcessed()` frames have been processed since the + // last reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_CHECK_GE(channel, 0); + RTC_CHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + if (PredictClippingEvent(channel)) { + const int new_level = + rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level); + const int step = level - new_level; + if (step > 0) { + return step; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() const { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping events based on the processed audio frames. Returns + // true if a clipping event is likely. + bool PredictClippingEvent(int channel) const { + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return false; + } + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return false; + } + const float crest_factor = ComputeCrestFactor(metrics.value()); + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + if (crest_factor < reference_crest_factor - crest_factor_margin_) { + return true; + } + return false; + } + + std::vector> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const float clipping_threshold_; + const float crest_factor_margin_; +}; + +// Performs crest factor-based clipping peak prediction. +class ClippingPeakPredictor : public ClippingPredictor { + public: + // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive + // clipped level step estimation is used if `adaptive_step_estimation` is + // true. + explicit ClippingPeakPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + int clipping_threshold, + bool adaptive_step_estimation) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + adaptive_step_estimation_(adaptive_step_estimation) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique(buffer_length)); + } + } + + ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; + ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; + ~ClippingPeakPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step (equal to + // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at + // least `GetMinFramesProcessed()` frames have been processed since the last + // reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_DCHECK_GE(channel, 0); + RTC_DCHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + absl::optional estimate_db = EstimatePeakValue(channel); + if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { + int step = 0; + if (!adaptive_step_estimation_) { + step = default_step; + } else { + const int estimated_gain_change = + rtc::SafeClamp(-static_cast(std::ceil(estimate_db.value())), + -kClippingPredictorMaxGainChange, 0); + step = + std::max(level - ComputeVolumeUpdate(estimated_gain_change, level, + min_mic_level, max_mic_level), + default_step); + } + const int new_level = + rtc::SafeClamp(level - step, min_mic_level, max_mic_level); + if (level > new_level) { + return level - new_level; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping sample peaks based on the processed audio frames. + // Returns the estimated peak value if clipping is predicted. Otherwise + // returns absl::nullopt. + absl::optional EstimatePeakValue(int channel) const { + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return absl::nullopt; + } + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return absl::nullopt; + } + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + const float& mean_squares = metrics.value().average; + const float projected_peak = + reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); + return projected_peak; + } + + std::vector> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const int clipping_threshold_; + const bool adaptive_step_estimation_; +}; + +} // namespace + +std::unique_ptr CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config) { + if (!config.enabled) { + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled."; + return nullptr; + } + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled."; + using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + switch (config.mode) { + case ClippingPredictorMode::kClippingEventPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + config.crest_factor_margin); + case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/true); + case ClippingPredictorMode::kFixedStepClippingPeakPrediction: + return std::make_unique( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/false); + } + RTC_DCHECK_NOTREACHED(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h new file mode 100644 index 0000000000..14612508c0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Frame-wise clipping prediction and clipped level step estimation. Analyzes +// 10 ms multi-channel frames and estimates an analog mic level decrease step +// to possibly avoid clipping when predicted. `Analyze()` and +// `EstimateClippedLevelStep()` can be called in any order. +class ClippingPredictor { + public: + virtual ~ClippingPredictor() = default; + + virtual void Reset() = 0; + + // Analyzes a 10 ms multi-channel audio frame. + virtual void Analyze(const AudioFrameView& frame) = 0; + + // Predicts if clipping is going to occur for the specified `channel` in the + // near-future and, if so, it returns a recommended analog mic level decrease + // step. Returns absl::nullopt if clipping is not predicted. + // `level` is the current analog mic level, `default_step` is the amount the + // mic level is lowered by the analog controller with every clipping event and + // `min_mic_level` and `max_mic_level` is the range of allowed analog mic + // levels. + virtual absl::optional EstimateClippedLevelStep( + int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const = 0; +}; + +// Creates a ClippingPredictor based on the provided `config`. When enabled, +// the following must hold for `config`: +// `window_length < reference_window_length + reference_window_delay`. +// Returns `nullptr` if `config.enabled` is false. +std::unique_ptr CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build new file mode 100644 index 0000000000..9cddd69abe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("clipping_predictor_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc new file mode 100644 index 0000000000..fe4cf2a154 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" + +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +bool ClippingPredictorLevelBuffer::Level::operator==(const Level& level) const { + constexpr float kEpsilon = 1e-6f; + return std::fabs(average - level.average) < kEpsilon && + std::fabs(max - level.max) < kEpsilon; +} + +ClippingPredictorLevelBuffer::ClippingPredictorLevelBuffer(int capacity) + : tail_(-1), size_(0), data_(std::max(1, capacity)) { + if (capacity > kMaxCapacity) { + RTC_LOG(LS_WARNING) << "[agc]: ClippingPredictorLevelBuffer exceeds the " + << "maximum allowed capacity. Capacity: " << capacity; + } + RTC_DCHECK(!data_.empty()); +} + +void ClippingPredictorLevelBuffer::Reset() { + tail_ = -1; + size_ = 0; +} + +void ClippingPredictorLevelBuffer::Push(Level level) { + ++tail_; + if (tail_ == Capacity()) { + tail_ = 0; + } + if (size_ < Capacity()) { + size_++; + } + data_[tail_] = level; +} + +// TODO(bugs.webrtc.org/12774): Optimize partial computation for long buffers. +absl::optional +ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay, + int num_items) const { + RTC_DCHECK_GE(delay, 0); + RTC_DCHECK_LT(delay, Capacity()); + RTC_DCHECK_GT(num_items, 0); + RTC_DCHECK_LE(num_items, Capacity()); + RTC_DCHECK_LE(delay + num_items, Capacity()); + if (delay + num_items > Size()) { + return absl::nullopt; + } + float sum = 0.0f; + float max = 0.0f; + for (int i = 0; i < num_items && i < Size(); ++i) { + int idx = tail_ - delay - i; + if (idx < 0) { + idx += Capacity(); + } + sum += data_[idx].average; + max = std::fmax(data_[idx].max, max); + } + return absl::optional({sum / static_cast(num_items), max}); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h new file mode 100644 index 0000000000..c9032773a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ + +#include +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// A circular buffer to store frame-wise `Level` items for clipping prediction. +// The current implementation is not optimized for large buffer lengths. +class ClippingPredictorLevelBuffer { + public: + struct Level { + float average; + float max; + bool operator==(const Level& level) const; + }; + + // Recommended maximum capacity. It is possible to create a buffer with a + // larger capacity, but the implementation is not optimized for large values. + static constexpr int kMaxCapacity = 100; + + // Ctor. Sets the buffer capacity to max(1, `capacity`) and logs a warning + // message if the capacity is greater than `kMaxCapacity`. + explicit ClippingPredictorLevelBuffer(int capacity); + ~ClippingPredictorLevelBuffer() {} + ClippingPredictorLevelBuffer(const ClippingPredictorLevelBuffer&) = delete; + ClippingPredictorLevelBuffer& operator=(const ClippingPredictorLevelBuffer&) = + delete; + + void Reset(); + + // Returns the current number of items stored in the buffer. + int Size() const { return size_; } + + // Returns the capacity of the buffer. + int Capacity() const { return data_.size(); } + + // Adds a `level` item into the circular buffer `data_`. Stores at most + // `Capacity()` items. If more items are pushed, the new item replaces the + // least recently pushed item. + void Push(Level level); + + // If at least `num_items` + `delay` items have been pushed, returns the + // average and maximum value for the `num_items` most recently pushed items + // from `delay` to `delay` - `num_items` (a delay equal to zero corresponds + // to the most recently pushed item). The value of `delay` is limited to + // [0, N] and `num_items` to [1, M] where N + M is the capacity of the buffer. + absl::optional ComputePartialMetrics(int delay, int num_items) const; + + private: + int tail_; + int size_; + std::vector data_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc new file mode 100644 index 0000000000..7af9a436c9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_level_buffer_unittest.cc @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" + +#include + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +class ClippingPredictorLevelBufferParametrization + : public ::testing::TestWithParam { + protected: + int capacity() const { return GetParam(); } +}; + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckHalfEmptyBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity() / 2; ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1) / 2); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckFullBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckLargeBufferSize) { + ClippingPredictorLevelBuffer buffer(capacity()); + for (int i = 0; i < 2 * buffer.Capacity(); ++i) { + buffer.Push({2, 4}); + } + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), std::max(capacity(), 1)); +} + +TEST_P(ClippingPredictorLevelBufferParametrization, CheckSizeAfterReset) { + ClippingPredictorLevelBuffer buffer(capacity()); + buffer.Push({1, 1}); + buffer.Push({1, 1}); + buffer.Reset(); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 0); + buffer.Push({1, 1}); + EXPECT_EQ(buffer.Capacity(), std::max(capacity(), 1)); + EXPECT_EQ(buffer.Size(), 1); +} + +INSTANTIATE_TEST_SUITE_P(ClippingPredictorLevelBufferTest, + ClippingPredictorLevelBufferParametrization, + ::testing::Values(-1, 0, 1, 123)); + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterFullBuffer) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{3, 6}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{1, 2}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{2, 6}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterPushBeyondCapacity) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 1}); + buffer.Push({3, 6}); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + buffer.Push({6, 12}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 12}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6.5f, 14}))); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterTooFewItems) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/4); + buffer.Push({1, 2}); + buffer.Push({3, 6}); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/3), + absl::nullopt); + EXPECT_EQ(buffer.ComputePartialMetrics(/*delay=*/2, /*num_items=*/1), + absl::nullopt); +} + +TEST(ClippingPredictorLevelBufferTest, CheckMetricsAfterReset) { + ClippingPredictorLevelBuffer buffer(/*capacity=*/2); + buffer.Push({1, 2}); + buffer.Reset(); + buffer.Push({5, 10}); + buffer.Push({7, 14}); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{7, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/0, /*num_items=*/2), + Optional(Eq(ClippingPredictorLevelBuffer::Level{6, 14}))); + EXPECT_THAT(buffer.ComputePartialMetrics(/*delay=*/1, /*num_items=*/1), + Optional(Eq(ClippingPredictorLevelBuffer::Level{5, 10}))); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc new file mode 100644 index 0000000000..af73107749 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor_unittest.cc @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; +using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kSamplesPerChannel = kSampleRateHz / 100; +constexpr int kMaxMicLevel = 255; +constexpr int kMinMicLevel = 12; +constexpr int kDefaultClippedLevelStep = 15; +constexpr float kMaxSampleS16 = + static_cast(std::numeric_limits::max()); + +// Threshold in dB corresponding to a signal with an amplitude equal to 99% of +// the dynamic range - i.e., computed as `20*log10(0.99)`. +constexpr float kClippingThresholdDb = -0.08729610804900176f; + +void CallAnalyze(int num_calls, + const AudioFrameView& frame, + ClippingPredictor& predictor) { + for (int i = 0; i < num_calls; ++i) { + predictor.Analyze(frame); + } +} + +// Creates and analyzes an audio frame with a non-zero (approx. 4.15dB) crest +// factor. +void AnalyzeNonZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.0f); + std::vector audio(num_channels); + std::vector audio_data(num_channels * kSamplesPerChannel, 0.0f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; sample += 10) { + audio[channel][sample] = 0.1f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 1] = 0.2f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 2] = 0.3f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 3] = 0.4f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 4] = 0.5f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 5] = 0.6f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 6] = 0.7f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 7] = 0.8f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 8] = 0.9f * peak_ratio * kMaxSampleS16; + audio[channel][sample + 9] = 1.0f * peak_ratio * kMaxSampleS16; + } + } + AudioFrameView frame(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +void CheckChannelEstimatesWithValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor, + int expected) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(predictor.EstimateClippedLevelStep( + i, level, default_step, min_mic_level, max_mic_level), + Optional(Eq(expected))); + } +} + +void CheckChannelEstimatesWithoutValue(int num_channels, + int level, + int default_step, + int min_mic_level, + int max_mic_level, + const ClippingPredictor& predictor) { + for (int i = 0; i < num_channels; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step, + min_mic_level, max_mic_level), + absl::nullopt); + } +} + +// Creates and analyzes an audio frame with a zero crest factor. +void AnalyzeZeroCrestFactorAudio(int num_calls, + int num_channels, + float peak_ratio, + ClippingPredictor& predictor) { + RTC_DCHECK_GT(num_calls, 0); + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_LE(peak_ratio, 1.f); + std::vector audio(num_channels); + std::vector audio_data(num_channels * kSamplesPerChannel, 0.f); + for (int channel = 0; channel < num_channels; ++channel) { + audio[channel] = &audio_data[channel * kSamplesPerChannel]; + for (int sample = 0; sample < kSamplesPerChannel; ++sample) { + audio[channel][sample] = peak_ratio * kMaxSampleS16; + } + } + auto frame = AudioFrameView(audio.data(), num_channels, + kSamplesPerChannel); + CallAnalyze(num_calls, frame, predictor); +} + +TEST(ClippingPeakPredictorTest, NoPredictorCreated) { + auto predictor = + CreateClippingPredictor(kNumChannels, /*config=*/{/*enabled=*/false}); + EXPECT_FALSE(predictor); +} + +TEST(ClippingPeakPredictorTest, ClippingEventPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, + /*config=*/{/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, AdaptiveStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +TEST(ClippingPeakPredictorTest, FixedStepClippingPeakPredictionCreated) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + auto predictor = CreateClippingPredictor( + kNumChannels, /*config=*/{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction}); + EXPECT_TRUE(predictor); +} + +class ClippingPredictorParameterization + : public ::testing::TestWithParam> { + protected: + int num_channels() const { return std::get<0>(GetParam()); } + ClippingPredictorConfig GetConfig(ClippingPredictorMode mode) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/mode, + /*window_length=*/std::get<1>(GetParam()), + /*reference_window_length=*/std::get<2>(GetParam()), + /*reference_window_delay=*/std::get<3>(GetParam()), + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/0.5f}; + } +}; + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kClippingEventPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorEstimateAfterHighCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeNonZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +TEST_P(ClippingPredictorParameterization, + CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) { + const ClippingPredictorConfig config = + GetConfig(ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction); + if (config.reference_window_length + config.reference_window_delay <= + config.window_length) { + return; + } + auto predictor = CreateClippingPredictor(num_channels(), config); + AnalyzeZeroCrestFactorAudio( + /*num_calls=*/config.reference_window_length + + config.reference_window_delay - config.window_length, + num_channels(), /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.window_length, + num_channels(), + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingPredictorParameterization, + ::testing::Combine(::testing::Values(1, 5), + ::testing::Values(1, 5, 10), + ::testing::Values(1, 5), + ::testing::Values(0, 1, 5))); + +class ClippingEventPredictorParameterization + : public ::testing::TestWithParam> { + protected: + ClippingPredictorConfig GetConfig() const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/std::get<0>(GetParam()), + /*crest_factor_margin=*/std::get<1>(GetParam())}; + } +}; + +TEST_P(ClippingEventPredictorParameterization, + CheckEstimateAfterCrestFactorDrop) { + const ClippingPredictorConfig config = GetConfig(); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + // TODO(bugs.webrtc.org/12774): Remove 4.15f threshold and split tests. + if (config.clipping_threshold < kClippingThresholdDb && + config.crest_factor_margin < 4.15f) { + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); + } else { + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor, + ClippingEventPredictorParameterization, + ::testing::Combine(::testing::Values(-1.0f, 0.0f), + ::testing::Values(3.0f, 4.16f))); + +class ClippingPredictorModeParameterization + : public ::testing::TestWithParam { + protected: + ClippingPredictorConfig GetConfig(float clipping_threshold_dbfs) const { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + return {/*enabled=*/true, + /*mode=*/GetParam(), + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/clipping_threshold_dbfs, + /*crest_factor_margin=*/3.0f}; + } +}; + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithNoClippingMargin) { + const ClippingPredictorConfig config = GetConfig( + /*clipping_threshold_dbfs=*/0.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // Since the clipping threshold is set to 0 dBFS, `EstimateClippedLevelStep()` + // is expected to return an unavailable value. + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST_P(ClippingPredictorModeParameterization, + CheckEstimateAfterHighCrestFactorWithClippingMargin) { + const ClippingPredictorConfig config = + GetConfig(/*clipping_threshold_dbfs=*/-1.0f); + auto predictor = CreateClippingPredictor(kNumChannels, config); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/config.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(config.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + // TODO(bugs.webrtc.org/12774): Add clarifying comment. + const float expected_step = + config.mode == ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction + ? 17 + : kDefaultClippedLevelStep; + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, expected_step); +} + +INSTANTIATE_TEST_SUITE_P( + GainController1ClippingPredictor, + ClippingPredictorModeParameterization, + ::testing::Values( + ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + ClippingPredictorMode::kFixedStepClippingPeakPrediction)); + +TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kClippingEventPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f, + /*crest_factor_margin=*/3.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + predictor->Reset(); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); +} + +TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, /*expected=*/17); +} + +TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) { + // TODO(bugs.webrtc.org/12874): Use designated initializers one fixed. + constexpr ClippingPredictorConfig kConfig{ + /*enabled=*/true, + /*mode=*/ClippingPredictorMode::kFixedStepClippingPeakPrediction, + /*window_length=*/5, + /*reference_window_length=*/5, + /*reference_window_delay=*/5, + /*clipping_threshold=*/-1.0f}; + auto predictor = CreateClippingPredictor(kNumChannels, kConfig); + AnalyzeNonZeroCrestFactorAudio(/*num_calls=*/kConfig.reference_window_length, + kNumChannels, /*peak_ratio=*/0.99f, + *predictor); + CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255, + kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor); + AnalyzeZeroCrestFactorAudio(kConfig.window_length, kNumChannels, + /*peak_ratio=*/0.99f, *predictor); + CheckChannelEstimatesWithValue( + kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel, + kMaxMicLevel, *predictor, kDefaultClippedLevelStep); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build new file mode 100644 index 0000000000..274cae23a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc new file mode 100644 index 0000000000..221b499e32 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" + +#include +#include +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +std::pair ComputeLinearApproximationParams( + const LimiterDbGainCurve* limiter, + const double x) { + const double m = limiter->GetGainFirstDerivativeLinear(x); + const double q = limiter->GetGainLinear(x) - m * x; + return {m, q}; +} + +double ComputeAreaUnderPiecewiseLinearApproximation( + const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + RTC_CHECK_LT(x0, x1); + + // Linear approximation in x0 and x1. + double m0, q0, m1, q1; + std::tie(m0, q0) = ComputeLinearApproximationParams(limiter, x0); + std::tie(m1, q1) = ComputeLinearApproximationParams(limiter, x1); + + // Intersection point between two adjacent linear pieces. + RTC_CHECK_NE(m1, m0); + const double x_split = (q0 - q1) / (m1 - m0); + RTC_CHECK_LT(x0, x_split); + RTC_CHECK_LT(x_split, x1); + + auto area_under_linear_piece = [](double x_l, double x_r, double m, + double q) { + return x_r * (m * x_r / 2.0 + q) - x_l * (m * x_l / 2.0 + q); + }; + return area_under_linear_piece(x0, x_split, m0, q0) + + area_under_linear_piece(x_split, x1, m1, q1); +} + +// Computes the approximation error in the limiter region for a given interval. +// The error is computed as the difference between the areas beneath the limiter +// curve to approximate and its linear under-approximation. +double LimiterUnderApproximationNegativeError(const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + const double area_limiter = limiter->GetGainIntegralLinear(x0, x1); + const double area_interpolated_curve = + ComputeAreaUnderPiecewiseLinearApproximation(limiter, x0, x1); + RTC_CHECK_GE(area_limiter, area_interpolated_curve); + return area_limiter - area_interpolated_curve; +} + +// Automatically finds where to sample the beyond-knee region of a limiter using +// a greedy optimization algorithm that iteratively decreases the approximation +// error. +// The solution is sub-optimal because the algorithm is greedy and the points +// are assigned by halving intervals (starting with the whole beyond-knee region +// as a single interval). However, even if sub-optimal, this algorithm works +// well in practice and it is efficiently implemented using priority queues. +std::vector SampleLimiterRegion(const LimiterDbGainCurve* limiter) { + static_assert(kInterpolatedGainCurveBeyondKneePoints > 2, ""); + + struct Interval { + Interval() = default; // Ctor required by std::priority_queue. + Interval(double l, double r, double e) : x0(l), x1(r), error(e) { + RTC_CHECK(x0 < x1); + } + bool operator<(const Interval& other) const { return error < other.error; } + + double x0; + double x1; + double error; + }; + + std::priority_queue> q; + q.emplace(limiter->limiter_start_linear(), limiter->max_input_level_linear(), + LimiterUnderApproximationNegativeError( + limiter, limiter->limiter_start_linear(), + limiter->max_input_level_linear())); + + // Iteratively find points by halving the interval with greatest error. + while (q.size() < kInterpolatedGainCurveBeyondKneePoints) { + // Get the interval with highest error. + const auto interval = q.top(); + q.pop(); + + // Split `interval` and enqueue. + double x_split = (interval.x0 + interval.x1) / 2.0; + q.emplace(interval.x0, x_split, + LimiterUnderApproximationNegativeError(limiter, interval.x0, + x_split)); // Left. + q.emplace(x_split, interval.x1, + LimiterUnderApproximationNegativeError(limiter, x_split, + interval.x1)); // Right. + } + + // Copy x1 values and sort them. + RTC_CHECK_EQ(q.size(), kInterpolatedGainCurveBeyondKneePoints); + std::vector samples(kInterpolatedGainCurveBeyondKneePoints); + for (size_t i = 0; i < kInterpolatedGainCurveBeyondKneePoints; ++i) { + const auto interval = q.top(); + q.pop(); + samples[i] = interval.x1; + } + RTC_CHECK(q.empty()); + std::sort(samples.begin(), samples.end()); + + return samples; +} + +// Compute the parameters to over-approximate the knee region via linear +// interpolation. Over-approximating is saturation-safe since the knee region is +// convex. +void PrecomputeKneeApproxParams(const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + static_assert(kInterpolatedGainCurveKneePoints > 2, ""); + // Get `kInterpolatedGainCurveKneePoints` - 1 equally spaced points. + const std::vector points = test::LinSpace( + limiter->knee_start_linear(), limiter->limiter_start_linear(), + kInterpolatedGainCurveKneePoints - 1); + + // Set the first two points. The second is computed to help with the beginning + // of the knee region, which has high curvature. + parameters->computed_approximation_params_x[0] = points[0]; + parameters->computed_approximation_params_x[1] = + (points[0] + points[1]) / 2.0; + // Copy the remaining points. + std::copy(std::begin(points) + 1, std::end(points), + std::begin(parameters->computed_approximation_params_x) + 2); + + // Compute (m, q) pairs for each linear piece y = mx + q. + for (size_t i = 0; i < kInterpolatedGainCurveKneePoints - 1; ++i) { + const double x0 = parameters->computed_approximation_params_x[i]; + const double x1 = parameters->computed_approximation_params_x[i + 1]; + const double y0 = limiter->GetGainLinear(x0); + const double y1 = limiter->GetGainLinear(x1); + RTC_CHECK_NE(x1, x0); + parameters->computed_approximation_params_m[i] = (y1 - y0) / (x1 - x0); + parameters->computed_approximation_params_q[i] = + y0 - parameters->computed_approximation_params_m[i] * x0; + } +} + +// Compute the parameters to under-approximate the beyond-knee region via linear +// interpolation and greedy sampling. Under-approximating is saturation-safe +// since the beyond-knee region is concave. +void PrecomputeBeyondKneeApproxParams( + const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + // Find points on which the linear pieces are tangent to the gain curve. + const auto samples = SampleLimiterRegion(limiter); + + // Parametrize each linear piece. + double m, q; + std::tie(m, q) = ComputeLinearApproximationParams( + limiter, + parameters + ->computed_approximation_params_x[kInterpolatedGainCurveKneePoints - + 1]); + parameters + ->computed_approximation_params_m[kInterpolatedGainCurveKneePoints - 1] = + m; + parameters + ->computed_approximation_params_q[kInterpolatedGainCurveKneePoints - 1] = + q; + for (size_t i = 0; i < samples.size(); ++i) { + std::tie(m, q) = ComputeLinearApproximationParams(limiter, samples[i]); + parameters + ->computed_approximation_params_m[i + + kInterpolatedGainCurveKneePoints] = m; + parameters + ->computed_approximation_params_q[i + + kInterpolatedGainCurveKneePoints] = q; + } + + // Find the point of intersection between adjacent linear pieces. They will be + // used as boundaries between adjacent linear pieces. + for (size_t i = kInterpolatedGainCurveKneePoints; + i < kInterpolatedGainCurveKneePoints + + kInterpolatedGainCurveBeyondKneePoints; + ++i) { + RTC_CHECK_NE(parameters->computed_approximation_params_m[i], + parameters->computed_approximation_params_m[i - 1]); + parameters->computed_approximation_params_x[i] = + ( // Formula: (q0 - q1) / (m1 - m0). + parameters->computed_approximation_params_q[i - 1] - + parameters->computed_approximation_params_q[i]) / + (parameters->computed_approximation_params_m[i] - + parameters->computed_approximation_params_m[i - 1]); + } +} + +} // namespace + +namespace test { + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams() { + InterpolatedParameters parameters; + LimiterDbGainCurve limiter; + parameters.computed_approximation_params_x.fill(0.0f); + parameters.computed_approximation_params_m.fill(0.0f); + parameters.computed_approximation_params_q.fill(0.0f); + PrecomputeKneeApproxParams(&limiter, ¶meters); + PrecomputeBeyondKneeApproxParams(&limiter, ¶meters); + return parameters; +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h new file mode 100644 index 0000000000..08b676f5fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +namespace test { + +// Parameters for interpolated gain curve using under-approximation to +// avoid saturation. +// +// The saturation gain is defined in order to let hard-clipping occur for +// those samples having a level that falls in the saturation region. It is an +// upper bound of the actual gain to apply - i.e., that returned by the +// limiter. + +// Knee and beyond-knee regions approximation parameters. +// The gain curve is approximated as a piece-wise linear function. +// `approx_params_x_` are the boundaries between adjacent linear pieces, +// `approx_params_m_` and `approx_params_q_` are the slope and the y-intercept +// values of each piece. +struct InterpolatedParameters { + std::array + computed_approximation_params_x; + std::array + computed_approximation_params_m; + std::array + computed_approximation_params_q; +}; + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams(); +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc new file mode 100644 index 0000000000..cced7614bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/cpu_features.h" + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +std::string AvailableCpuFeatures::ToString() const { + char buf[64]; + rtc::SimpleStringBuilder builder(buf); + bool first = true; + if (sse2) { + builder << (first ? "SSE2" : "_SSE2"); + first = false; + } + if (avx2) { + builder << (first ? "AVX2" : "_AVX2"); + first = false; + } + if (neon) { + builder << (first ? "NEON" : "_NEON"); + first = false; + } + if (first) { + return "none"; + } + return builder.str(); +} + +// Detects available CPU features. +AvailableCpuFeatures GetAvailableCpuFeatures() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + return {/*sse2=*/GetCPUInfo(kSSE2) != 0, + /*avx2=*/GetCPUInfo(kAVX2) != 0, + /*neon=*/false}; +#elif defined(WEBRTC_HAS_NEON) + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/true}; +#else + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/false}; +#endif +} + +AvailableCpuFeatures NoAvailableCpuFeatures() { + return {/*sse2=*/false, /*avx2=*/false, /*neon=*/false}; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h new file mode 100644 index 0000000000..54ddfb3055 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ + +#include + +namespace webrtc { + +// Collection of flags indicating which CPU features are available on the +// current platform. True means available. +struct AvailableCpuFeatures { + AvailableCpuFeatures(bool sse2, bool avx2, bool neon) + : sse2(sse2), avx2(avx2), neon(neon) {} + // Intel. + bool sse2; + bool avx2; + // ARM. + bool neon; + std::string ToString() const; +}; + +// Detects what CPU features are available. +AvailableCpuFeatures GetAvailableCpuFeatures(); + +// Returns the CPU feature flags all set to false. +AvailableCpuFeatures NoAvailableCpuFeatures(); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build new file mode 100644 index 0000000000..a4572251ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("cpu_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build new file mode 100644 index 0000000000..3a54fc3171 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("fixed_digital_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc new file mode 100644 index 0000000000..1995b24913 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kInitialFilterStateLevel = 0.0f; + +// Instant attack. +constexpr float kAttackFilterConstant = 0.0f; + +// Limiter decay constant. +// Computed as `10 ** (-1/20 * subframe_duration / kDecayMs)` where: +// - `subframe_duration` is `kFrameDurationMs / kSubFramesInFrame`; +// - `kDecayMs` is defined in agc2_testing_common.h. +constexpr float kDecayFilterConstant = 0.9971259f; + +} // namespace + +FixedDigitalLevelEstimator::FixedDigitalLevelEstimator( + int sample_rate_hz, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + filter_state_level_(kInitialFilterStateLevel) { + SetSampleRate(sample_rate_hz); + CheckParameterCombination(); + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz); +} + +void FixedDigitalLevelEstimator::CheckParameterCombination() { + RTC_DCHECK_GT(samples_in_frame_, 0); + RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_); + RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0); + RTC_DCHECK_GT(samples_in_sub_frame_, 1); +} + +std::array FixedDigitalLevelEstimator::ComputeLevel( + const AudioFrameView& float_frame) { + RTC_DCHECK_GT(float_frame.num_channels(), 0); + RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_); + + // Compute max envelope without smoothing. + std::array envelope{}; + for (int channel_idx = 0; channel_idx < float_frame.num_channels(); + ++channel_idx) { + const auto channel = float_frame.channel(channel_idx); + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + for (int sample_in_sub_frame = 0; + sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) { + envelope[sub_frame] = + std::max(envelope[sub_frame], + std::abs(channel[sub_frame * samples_in_sub_frame_ + + sample_in_sub_frame])); + } + } + } + + // Make sure envelope increases happen one step earlier so that the + // corresponding *gain decrease* doesn't miss a sudden signal + // increase due to interpolation. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) { + if (envelope[sub_frame] < envelope[sub_frame + 1]) { + envelope[sub_frame] = envelope[sub_frame + 1]; + } + } + + // Add attack / decay smoothing. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + const float envelope_value = envelope[sub_frame]; + if (envelope_value > filter_state_level_) { + envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) + + filter_state_level_ * kAttackFilterConstant; + } else { + envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) + + filter_state_level_ * kDecayFilterConstant; + } + filter_state_level_ = envelope[sub_frame]; + + // Dump data for debug. + RTC_DCHECK(apm_data_dumper_); + const auto channel = float_frame.channel(0); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samples", + samples_in_sub_frame_, + &channel[sub_frame * samples_in_sub_frame_]); + apm_data_dumper_->DumpRaw("agc2_level_estimator_level", + envelope[sub_frame]); + } + + return envelope; +} + +void FixedDigitalLevelEstimator::SetSampleRate(int sample_rate_hz) { + samples_in_frame_ = + rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs, 1000); + samples_in_sub_frame_ = + rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame); + CheckParameterCombination(); +} + +void FixedDigitalLevelEstimator::Reset() { + filter_state_level_ = kInitialFilterStateLevel; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h new file mode 100644 index 0000000000..d26b55950c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ + +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +class ApmDataDumper; +// Produces a smooth signal level estimate from an input audio +// stream. The estimate smoothing is done through exponential +// filtering. +class FixedDigitalLevelEstimator { + public: + // Sample rates are allowed if the number of samples in a frame + // (sample_rate_hz * kFrameDurationMs / 1000) is divisible by + // kSubFramesInSample. For kFrameDurationMs=10 and + // kSubFramesInSample=20, this means that sample_rate_hz has to be + // divisible by 2000. + FixedDigitalLevelEstimator(int sample_rate_hz, + ApmDataDumper* apm_data_dumper); + + FixedDigitalLevelEstimator(const FixedDigitalLevelEstimator&) = delete; + FixedDigitalLevelEstimator& operator=(const FixedDigitalLevelEstimator&) = + delete; + + // The input is assumed to be in FloatS16 format. Scaled input will + // produce similarly scaled output. A frame of with kFrameDurationMs + // ms of audio produces a level estimates in the same scale. The + // level estimate contains kSubFramesInFrame values. + std::array ComputeLevel( + const AudioFrameView& float_frame); + + // Rate may be changed at any time (but not concurrently) from the + // value passed to the constructor. The class is not thread safe. + void SetSampleRate(int sample_rate_hz); + + // Resets the level estimator internal state. + void Reset(); + + float LastAudioLevel() const { return filter_state_level_; } + + private: + void CheckParameterCombination(); + + ApmDataDumper* const apm_data_dumper_ = nullptr; + float filter_state_level_; + int samples_in_frame_; + int samples_in_sub_frame_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc new file mode 100644 index 0000000000..97b421d04c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInputLevel = 10000.f; + +// Run audio at specified settings through the level estimator, and +// verify that the output level falls within the bounds. +void TestLevelEstimator(int sample_rate_hz, + int num_channels, + float input_level_linear_scale, + float expected_min, + float expected_max) { + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), + input_level_linear_scale); + + for (int i = 0; i < 500; ++i) { + const auto level = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + + // Give the estimator some time to ramp up. + if (i < 50) { + continue; + } + + for (const auto& x : level) { + EXPECT_LE(expected_min, x); + EXPECT_LE(x, expected_max); + } + } +} + +// Returns time it takes for the level estimator to decrease its level +// estimate by 'level_reduction_db'. +float TimeMsToDecreaseLevel(int sample_rate_hz, + int num_channels, + float input_level_db, + float level_reduction_db) { + const float input_level = DbfsToFloatS16(input_level_db); + RTC_DCHECK_GT(level_reduction_db, 0); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level); + + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + // Give the LevelEstimator plenty of time to ramp up and stabilize + float last_level = 0.f; + for (int i = 0; i < 500; ++i) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + last_level = *level_envelope.rbegin(); + } + + // Set input to 0. + VectorFloatFrame vectors_with_zero_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0); + + const float reduced_level_linear = + DbfsToFloatS16(input_level_db - level_reduction_db); + int sub_frames_until_level_reduction = 0; + while (last_level > reduced_level_linear) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_zero_float_frame.float_frame_view()); + for (const auto& v : level_envelope) { + EXPECT_LT(v, last_level); + sub_frames_until_level_reduction++; + last_level = v; + if (last_level <= reduced_level_linear) { + break; + } + } + } + return static_cast(sub_frames_until_level_reduction) * + kFrameDurationMs / kSubFramesInFrame; +} +} // namespace + +TEST(GainController2FixedDigitalLevelEstimator, EstimatorShouldNotCrash) { + TestLevelEstimator(8000, 1, 0, std::numeric_limits::lowest(), + std::numeric_limits::max()); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevel) { + TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevelForManyChannels) { + constexpr size_t num_channels = 10; + TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, TimeToDecreaseForLowLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kInitialLowLevel = -40; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForFullScaleLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForMultipleChannels) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + constexpr size_t kNumChannels = 10; + + const float time_to_decrease = + TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc new file mode 100644 index 0000000000..f9e276d3a8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Returns true when the gain factor is so close to 1 that it would +// not affect int16 samples. +bool GainCloseToOne(float gain_factor) { + return 1.f - 1.f / kMaxFloatS16Value <= gain_factor && + gain_factor <= 1.f + 1.f / kMaxFloatS16Value; +} + +void ClipSignal(AudioFrameView signal) { + for (int k = 0; k < signal.num_channels(); ++k) { + rtc::ArrayView channel_view = signal.channel(k); + for (auto& sample : channel_view) { + sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void ApplyGainWithRamping(float last_gain_linear, + float gain_at_end_of_frame_linear, + float inverse_samples_per_channel, + AudioFrameView float_frame) { + // Do not modify the signal. + if (last_gain_linear == gain_at_end_of_frame_linear && + GainCloseToOne(gain_at_end_of_frame_linear)) { + return; + } + + // Gain is constant and different from 1. + if (last_gain_linear == gain_at_end_of_frame_linear) { + for (int k = 0; k < float_frame.num_channels(); ++k) { + rtc::ArrayView channel_view = float_frame.channel(k); + for (auto& sample : channel_view) { + sample *= gain_at_end_of_frame_linear; + } + } + return; + } + + // The gain changes. We have to change slowly to avoid discontinuities. + const float increment = (gain_at_end_of_frame_linear - last_gain_linear) * + inverse_samples_per_channel; + float gain = last_gain_linear; + for (int i = 0; i < float_frame.samples_per_channel(); ++i) { + for (int ch = 0; ch < float_frame.num_channels(); ++ch) { + float_frame.channel(ch)[i] *= gain; + } + gain += increment; + } +} + +} // namespace + +GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor) + : hard_clip_samples_(hard_clip_samples), + last_gain_factor_(initial_gain_factor), + current_gain_factor_(initial_gain_factor) {} + +void GainApplier::ApplyGain(AudioFrameView signal) { + if (static_cast(signal.samples_per_channel()) != samples_per_channel_) { + Initialize(signal.samples_per_channel()); + } + + ApplyGainWithRamping(last_gain_factor_, current_gain_factor_, + inverse_samples_per_channel_, signal); + + last_gain_factor_ = current_gain_factor_; + + if (hard_clip_samples_) { + ClipSignal(signal); + } +} + +// TODO(bugs.webrtc.org/7494): Remove once switched to gains in dB. +void GainApplier::SetGainFactor(float gain_factor) { + RTC_DCHECK_GT(gain_factor, 0.f); + current_gain_factor_ = gain_factor; +} + +void GainApplier::Initialize(int samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + samples_per_channel_ = static_cast(samples_per_channel); + inverse_samples_per_channel_ = 1.f / samples_per_channel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h new file mode 100644 index 0000000000..ba8a4a4cd2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class GainApplier { + public: + GainApplier(bool hard_clip_samples, float initial_gain_factor); + + void ApplyGain(AudioFrameView signal); + void SetGainFactor(float gain_factor); + float GetGainFactor() const { return current_gain_factor_; } + + private: + void Initialize(int samples_per_channel); + + // Whether to clip samples after gain is applied. If 'true', result + // will fit in FloatS16 range. + const bool hard_clip_samples_; + float last_gain_factor_; + + // If this value is not equal to 'last_gain_factor', gain will be + // ramped from 'last_gain_factor_' to this value during the next + // 'ApplyGain'. + float current_gain_factor_; + int samples_per_channel_ = -1; + float inverse_samples_per_channel_ = -1.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build new file mode 100644 index 0000000000..394aa109fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_applier_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc new file mode 100644 index 0000000000..3296345e62 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include + +#include +#include + +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +TEST(AutomaticGainController2GainApplier, InitialGainIsRespected) { + constexpr float initial_signal_level = 123.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + std::numeric_limits::max(), 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(false, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, RampingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float initial_gain_factor = 1.f; + constexpr float target_gain_factor = 0.5f; + constexpr int num_channels = 3; + constexpr int samples_per_channel = 4; + VectorFloatFrame fake_audio(num_channels, samples_per_channel, + initial_signal_level); + GainApplier gain_applier(false, initial_gain_factor); + + gain_applier.SetGainFactor(target_gain_factor); + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + // The maximal gain change should be close to that in linear interpolation. + for (size_t channel = 0; channel < num_channels; ++channel) { + float max_signal_change = 0.f; + float last_signal_level = initial_signal_level; + for (const auto sample : fake_audio.float_frame_view().channel(channel)) { + const float current_change = fabs(last_signal_level - sample); + max_signal_change = std::max(max_signal_change, current_change); + last_signal_level = sample; + } + const float total_gain_change = + fabs((initial_gain_factor - target_gain_factor) * initial_signal_level); + EXPECT_NEAR(max_signal_change, total_gain_change / samples_per_channel, + 0.1f); + } + + // Next frame should have the desired level. + VectorFloatFrame next_fake_audio_frame(num_channels, samples_per_channel, + initial_signal_level); + gain_applier.ApplyGain(next_fake_audio_frame.float_frame_view()); + + // The last sample should have the new gain. + EXPECT_NEAR(next_fake_audio_frame.float_frame_view().channel(0)[0], + initial_signal_level * target_gain_factor, 0.1f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build new file mode 100644 index 0000000000..5b1d68415a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_map_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h new file mode 100644 index 0000000000..7c669fc9dd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_map_internal.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ + +namespace webrtc { + +static constexpr int kGainMapSize = 256; +// Maps input volumes, which are values in the [0, 255] range, to gains in dB. +// The values below are generated with numpy as follows: +// SI = 2 # Initial slope. +// SF = 0.25 # Final slope. +// D = 8/256 # Quantization factor. +// x = np.linspace(0, 255, 256) # Input volumes. +// y = (SF * x + (SI - SF) * (1 - np.exp(-D*x)) / D - 56).round() +static const int kGainMap[kGainMapSize] = { + -56, -54, -52, -50, -48, -47, -45, -43, -42, -40, -38, -37, -35, -34, -33, + -31, -30, -29, -27, -26, -25, -24, -23, -22, -20, -19, -18, -17, -16, -15, + -14, -14, -13, -12, -11, -10, -9, -8, -8, -7, -6, -5, -5, -4, -3, + -2, -2, -1, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + 13, 14, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 18, 19, + 19, 19, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24, + 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28, + 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33, + 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, + 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 41, + 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 44, 44, 44, 44, 45, + 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, 48, 48, + 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52, + 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 56, 56, + 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, + 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_MAP_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc new file mode 100644 index 0000000000..bcc650fb3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_controller.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount of error we tolerate in the microphone input volume (presumably due to +// OS quantization) before we assume the user has manually adjusted the volume. +constexpr int kVolumeQuantizationSlack = 25; + +constexpr int kMaxInputVolume = 255; +static_assert(kGainMapSize > kMaxInputVolume, "gain map too small"); + +// Maximum absolute RMS error. +constexpr int KMaxAbsRmsErrorDbfs = 15; +static_assert(KMaxAbsRmsErrorDbfs > 0, ""); + +using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; + +// TODO(webrtc:7494): Hardcode clipping predictor parameters and remove this +// function after no longer needed in the ctor. +Agc1ClippingPredictorConfig CreateClippingPredictorConfig(bool enabled) { + Agc1ClippingPredictorConfig config; + config.enabled = enabled; + + return config; +} + +// Returns an input volume in the [`min_input_volume`, `kMaxInputVolume`] range +// that reduces `gain_error_db`, which is a gain error estimated when +// `input_volume` was applied, according to a fixed gain map. +int ComputeVolumeUpdate(int gain_error_db, + int input_volume, + int min_input_volume) { + RTC_DCHECK_GE(input_volume, 0); + RTC_DCHECK_LE(input_volume, kMaxInputVolume); + if (gain_error_db == 0) { + return input_volume; + } + + int new_volume = input_volume; + if (gain_error_db > 0) { + while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && + new_volume < kMaxInputVolume) { + ++new_volume; + } + } else { + while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && + new_volume > min_input_volume) { + --new_volume; + } + } + return new_volume; +} + +// Returns the proportion of samples in the buffer which are at full-scale +// (and presumably clipped). +float ComputeClippedRatio(const float* const* audio, + size_t num_channels, + size_t samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + int num_clipped = 0; + for (size_t ch = 0; ch < num_channels; ++ch) { + int num_clipped_in_ch = 0; + for (size_t i = 0; i < samples_per_channel; ++i) { + RTC_DCHECK(audio[ch]); + if (audio[ch][i] >= 32767.0f || audio[ch][i] <= -32768.0f) { + ++num_clipped_in_ch; + } + } + num_clipped = std::max(num_clipped, num_clipped_in_ch); + } + return static_cast(num_clipped) / (samples_per_channel); +} + +void LogClippingMetrics(int clipping_rate) { + RTC_LOG(LS_INFO) << "[AGC2] Input clipping rate: " << clipping_rate << "%"; + RTC_HISTOGRAM_COUNTS_LINEAR(/*name=*/"WebRTC.Audio.Agc.InputClippingRate", + /*sample=*/clipping_rate, /*min=*/0, /*max=*/100, + /*bucket_count=*/50); +} + +// Compares `speech_level_dbfs` to the [`target_range_min_dbfs`, +// `target_range_max_dbfs`] range and returns the error to be compensated via +// input volume adjustment. Returns a positive value when the level is below +// the range, a negative value when the level is above the range, zero +// otherwise. +int GetSpeechLevelRmsErrorDb(float speech_level_dbfs, + int target_range_min_dbfs, + int target_range_max_dbfs) { + constexpr float kMinSpeechLevelDbfs = -90.0f; + constexpr float kMaxSpeechLevelDbfs = 30.0f; + RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); + RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); + speech_level_dbfs = rtc::SafeClamp( + speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); + + int rms_error_db = 0; + if (speech_level_dbfs > target_range_max_dbfs) { + rms_error_db = std::round(target_range_max_dbfs - speech_level_dbfs); + } else if (speech_level_dbfs < target_range_min_dbfs) { + rms_error_db = std::round(target_range_min_dbfs - speech_level_dbfs); + } + + return rms_error_db; +} + +} // namespace + +MonoInputVolumeController::MonoInputVolumeController( + int min_input_volume_after_clipping, + int min_input_volume, + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold) + : min_input_volume_(min_input_volume), + min_input_volume_after_clipping_(min_input_volume_after_clipping), + max_input_volume_(kMaxInputVolume), + update_input_volume_wait_frames_( + std::max(update_input_volume_wait_frames, 1)), + speech_probability_threshold_(speech_probability_threshold), + speech_ratio_threshold_(speech_ratio_threshold) { + RTC_DCHECK_GE(min_input_volume_, 0); + RTC_DCHECK_LE(min_input_volume_, 255); + RTC_DCHECK_GE(min_input_volume_after_clipping_, 0); + RTC_DCHECK_LE(min_input_volume_after_clipping_, 255); + RTC_DCHECK_GE(max_input_volume_, 0); + RTC_DCHECK_LE(max_input_volume_, 255); + RTC_DCHECK_GE(update_input_volume_wait_frames_, 0); + RTC_DCHECK_GE(speech_probability_threshold_, 0.0f); + RTC_DCHECK_LE(speech_probability_threshold_, 1.0f); + RTC_DCHECK_GE(speech_ratio_threshold_, 0.0f); + RTC_DCHECK_LE(speech_ratio_threshold_, 1.0f); +} + +MonoInputVolumeController::~MonoInputVolumeController() = default; + +void MonoInputVolumeController::Initialize() { + max_input_volume_ = kMaxInputVolume; + capture_output_used_ = true; + check_volume_on_next_process_ = true; + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = true; +} + +// A speeh segment is considered active if at least +// `update_input_volume_wait_frames_` new frames have been processed since the +// previous update and the ratio of non-silence frames (i.e., frames with a +// `speech_probability` higher than `speech_probability_threshold_`) is at least +// `speech_ratio_threshold_`. +void MonoInputVolumeController::Process(absl::optional rms_error_db, + float speech_probability) { + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + // Count frames with a high speech probability as speech. + if (speech_probability >= speech_probability_threshold_) { + ++speech_frames_since_update_input_volume_; + } + + // Reset the counters and maybe update the input volume. + if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_) { + const float speech_ratio = + static_cast(speech_frames_since_update_input_volume_) / + static_cast(update_input_volume_wait_frames_); + + // Always reset the counters regardless of whether the volume changes or + // not. + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + + // Update the input volume if allowed. + if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_ && + rms_error_db.has_value()) { + UpdateInputVolume(*rms_error_db); + } + } + + is_first_frame_ = false; +} + +void MonoInputVolumeController::HandleClipping(int clipped_level_step) { + RTC_DCHECK_GT(clipped_level_step, 0); + // Always decrease the maximum input volume, even if the current input volume + // is below threshold. + SetMaxLevel(std::max(min_input_volume_after_clipping_, + max_input_volume_ - clipped_level_step)); + if (log_to_histograms_) { + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + last_recommended_input_volume_ - clipped_level_step >= + min_input_volume_after_clipping_); + } + if (last_recommended_input_volume_ > min_input_volume_after_clipping_) { + // Don't try to adjust the input volume if we're already below the limit. As + // a consequence, if the user has brought the input volume above the limit, + // we will still not react until the postproc updates the input volume. + SetInputVolume( + std::max(min_input_volume_after_clipping_, + last_recommended_input_volume_ - clipped_level_step)); + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = false; + } +} + +void MonoInputVolumeController::SetInputVolume(int new_volume) { + int applied_input_volume = recommended_input_volume_; + if (applied_input_volume == 0) { + RTC_DLOG(LS_INFO) + << "[AGC2] The applied input volume is zero, taking no action."; + return; + } + if (applied_input_volume < 0 || applied_input_volume > kMaxInputVolume) { + RTC_LOG(LS_ERROR) << "[AGC2] Invalid value for the applied input volume: " + << applied_input_volume; + return; + } + + // Detect manual input volume adjustments by checking if the + // `applied_input_volume` is outside of the `[last_recommended_input_volume_ - + // kVolumeQuantizationSlack, last_recommended_input_volume_ + + // kVolumeQuantizationSlack]` range. + if (applied_input_volume > + last_recommended_input_volume_ + kVolumeQuantizationSlack || + applied_input_volume < + last_recommended_input_volume_ - kVolumeQuantizationSlack) { + RTC_DLOG(LS_INFO) + << "[AGC2] The input volume was manually adjusted. Updating " + "stored input volume from " + << last_recommended_input_volume_ << " to " << applied_input_volume; + last_recommended_input_volume_ = applied_input_volume; + // Always allow the user to increase the volume. + if (last_recommended_input_volume_ > max_input_volume_) { + SetMaxLevel(last_recommended_input_volume_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = false; + return; + } + + new_volume = std::min(new_volume, max_input_volume_); + if (new_volume == last_recommended_input_volume_) { + return; + } + + recommended_input_volume_ = new_volume; + RTC_DLOG(LS_INFO) << "[AGC2] Applied input volume: " << applied_input_volume + << " | last recommended input volume: " + << last_recommended_input_volume_ + << " | newly recommended input volume: " << new_volume; + last_recommended_input_volume_ = new_volume; +} + +void MonoInputVolumeController::SetMaxLevel(int input_volume) { + RTC_DCHECK_GE(input_volume, min_input_volume_after_clipping_); + max_input_volume_ = input_volume; + RTC_DLOG(LS_INFO) << "[AGC2] Maximum input volume updated: " + << max_input_volume_; +} + +void MonoInputVolumeController::HandleCaptureOutputUsedChange( + bool capture_output_used) { + if (capture_output_used_ == capture_output_used) { + return; + } + capture_output_used_ = capture_output_used; + + if (capture_output_used) { + // When we start using the output, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +int MonoInputVolumeController::CheckVolumeAndReset() { + int input_volume = recommended_input_volume_; + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of `input_volume` == 0 we should raise it + // so the AGC can do its job properly. + if (input_volume == 0 && !startup_) { + RTC_DLOG(LS_INFO) + << "[AGC2] The applied input volume is zero, taking no action."; + return 0; + } + if (input_volume < 0 || input_volume > kMaxInputVolume) { + RTC_LOG(LS_ERROR) << "[AGC2] Invalid value for the applied input volume: " + << input_volume; + return -1; + } + RTC_DLOG(LS_INFO) << "[AGC2] Initial input volume: " << input_volume; + + if (input_volume < min_input_volume_) { + input_volume = min_input_volume_; + RTC_DLOG(LS_INFO) + << "[AGC2] The initial input volume is too low, raising to " + << input_volume; + recommended_input_volume_ = input_volume; + } + + last_recommended_input_volume_ = input_volume; + startup_ = false; + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + is_first_frame_ = true; + + return 0; +} + +void MonoInputVolumeController::UpdateInputVolume(int rms_error_db) { + RTC_DLOG(LS_INFO) << "[AGC2] RMS error: " << rms_error_db << " dB"; + // Prevent too large microphone input volume changes by clamping the RMS + // error. + rms_error_db = + rtc::SafeClamp(rms_error_db, -KMaxAbsRmsErrorDbfs, KMaxAbsRmsErrorDbfs); + if (rms_error_db == 0) { + return; + } + SetInputVolume(ComputeVolumeUpdate( + rms_error_db, last_recommended_input_volume_, min_input_volume_)); +} + +InputVolumeController::InputVolumeController(int num_capture_channels, + const Config& config) + : num_capture_channels_(num_capture_channels), + min_input_volume_(config.min_input_volume), + capture_output_used_(true), + clipped_level_step_(config.clipped_level_step), + clipped_ratio_threshold_(config.clipped_ratio_threshold), + clipped_wait_frames_(config.clipped_wait_frames), + clipping_predictor_(CreateClippingPredictor( + num_capture_channels, + CreateClippingPredictorConfig(config.enable_clipping_predictor))), + use_clipping_predictor_step_( + !!clipping_predictor_ && + CreateClippingPredictorConfig(config.enable_clipping_predictor) + .use_predicted_step), + frames_since_clipped_(config.clipped_wait_frames), + clipping_rate_log_counter_(0), + clipping_rate_log_(0.0f), + target_range_max_dbfs_(config.target_range_max_dbfs), + target_range_min_dbfs_(config.target_range_min_dbfs), + channel_controllers_(num_capture_channels) { + RTC_LOG(LS_INFO) + << "[AGC2] Input volume controller enabled. Minimum input volume: " + << min_input_volume_; + + for (auto& controller : channel_controllers_) { + controller = std::make_unique( + config.clipped_level_min, min_input_volume_, + config.update_input_volume_wait_frames, + config.speech_probability_threshold, config.speech_ratio_threshold); + } + + RTC_DCHECK(!channel_controllers_.empty()); + RTC_DCHECK_GT(clipped_level_step_, 0); + RTC_DCHECK_LE(clipped_level_step_, 255); + RTC_DCHECK_GT(clipped_ratio_threshold_, 0.0f); + RTC_DCHECK_LT(clipped_ratio_threshold_, 1.0f); + RTC_DCHECK_GT(clipped_wait_frames_, 0); + channel_controllers_[0]->ActivateLogging(); +} + +InputVolumeController::~InputVolumeController() {} + +void InputVolumeController::Initialize() { + for (auto& controller : channel_controllers_) { + controller->Initialize(); + } + capture_output_used_ = true; + + AggregateChannelLevels(); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + + applied_input_volume_ = absl::nullopt; +} + +void InputVolumeController::AnalyzeInputAudio(int applied_input_volume, + const AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(applied_input_volume, 0); + RTC_DCHECK_LE(applied_input_volume, 255); + + SetAppliedInputVolume(applied_input_volume); + + RTC_DCHECK_EQ(audio_buffer.num_channels(), channel_controllers_.size()); + const float* const* audio = audio_buffer.channels_const(); + size_t samples_per_channel = audio_buffer.num_frames(); + RTC_DCHECK(audio); + + AggregateChannelLevels(); + if (!capture_output_used_) { + return; + } + + if (!!clipping_predictor_) { + AudioFrameView frame = AudioFrameView( + audio, num_capture_channels_, static_cast(samples_per_channel)); + clipping_predictor_->Analyze(frame); + } + + // Check for clipped samples. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone + // input volume and enforce a new maximum input volume, dropped the same + // amount from the current maximum. This harsh treatment is an effort to avoid + // repeated clipped echo events. + float clipped_ratio = + ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel); + clipping_rate_log_ = std::max(clipped_ratio, clipping_rate_log_); + clipping_rate_log_counter_++; + constexpr int kNumFramesIn30Seconds = 3000; + if (clipping_rate_log_counter_ == kNumFramesIn30Seconds) { + LogClippingMetrics(std::round(100.0f * clipping_rate_log_)); + clipping_rate_log_ = 0.0f; + clipping_rate_log_counter_ = 0; + } + + if (frames_since_clipped_ < clipped_wait_frames_) { + ++frames_since_clipped_; + return; + } + + const bool clipping_detected = clipped_ratio > clipped_ratio_threshold_; + bool clipping_predicted = false; + int predicted_step = 0; + if (!!clipping_predictor_) { + for (int channel = 0; channel < num_capture_channels_; ++channel) { + const auto step = clipping_predictor_->EstimateClippedLevelStep( + channel, recommended_input_volume_, clipped_level_step_, + channel_controllers_[channel]->min_input_volume_after_clipping(), + kMaxInputVolume); + if (step.has_value()) { + predicted_step = std::max(predicted_step, step.value()); + clipping_predicted = true; + } + } + } + + if (clipping_detected) { + RTC_DLOG(LS_INFO) << "[AGC2] Clipping detected (ratio: " << clipped_ratio + << ")"; + } + + int step = clipped_level_step_; + if (clipping_predicted) { + predicted_step = std::max(predicted_step, clipped_level_step_); + RTC_DLOG(LS_INFO) << "[AGC2] Clipping predicted (volume down step: " + << predicted_step << ")"; + if (use_clipping_predictor_step_) { + step = predicted_step; + } + } + + if (clipping_detected || + (clipping_predicted && use_clipping_predictor_step_)) { + for (auto& state_ch : channel_controllers_) { + state_ch->HandleClipping(step); + } + frames_since_clipped_ = 0; + if (!!clipping_predictor_) { + clipping_predictor_->Reset(); + } + } + + AggregateChannelLevels(); +} + +absl::optional InputVolumeController::RecommendInputVolume( + float speech_probability, + absl::optional speech_level_dbfs) { + // Only process if applied input volume is set. + if (!applied_input_volume_.has_value()) { + RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set."; + return absl::nullopt; + } + + AggregateChannelLevels(); + const int volume_after_clipping_handling = recommended_input_volume_; + + if (!capture_output_used_) { + return applied_input_volume_; + } + + absl::optional rms_error_db; + if (speech_level_dbfs.has_value()) { + // Compute the error for all frames (both speech and non-speech frames). + rms_error_db = GetSpeechLevelRmsErrorDb( + *speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_); + } + + for (auto& controller : channel_controllers_) { + controller->Process(rms_error_db, speech_probability); + } + + AggregateChannelLevels(); + if (volume_after_clipping_handling != recommended_input_volume_) { + // The recommended input volume was adjusted in order to match the target + // level. + UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget( + recommended_input_volume_); + } + + applied_input_volume_ = absl::nullopt; + return recommended_input_volume(); +} + +void InputVolumeController::HandleCaptureOutputUsedChange( + bool capture_output_used) { + for (auto& controller : channel_controllers_) { + controller->HandleCaptureOutputUsedChange(capture_output_used); + } + + capture_output_used_ = capture_output_used; +} + +void InputVolumeController::SetAppliedInputVolume(int input_volume) { + applied_input_volume_ = input_volume; + + for (auto& controller : channel_controllers_) { + controller->set_stream_analog_level(input_volume); + } + + AggregateChannelLevels(); +} + +void InputVolumeController::AggregateChannelLevels() { + int new_recommended_input_volume = + channel_controllers_[0]->recommended_analog_level(); + channel_controlling_gain_ = 0; + for (size_t ch = 1; ch < channel_controllers_.size(); ++ch) { + int input_volume = channel_controllers_[ch]->recommended_analog_level(); + if (input_volume < new_recommended_input_volume) { + new_recommended_input_volume = input_volume; + channel_controlling_gain_ = static_cast(ch); + } + } + + // Enforce the minimum input volume when a recommendation is made. + if (applied_input_volume_.has_value() && *applied_input_volume_ > 0) { + new_recommended_input_volume = + std::max(new_recommended_input_volume, min_input_volume_); + } + + recommended_input_volume_ = new_recommended_input_volume; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h new file mode 100644 index 0000000000..21405542dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/clipping_predictor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class MonoInputVolumeController; + +// The input volume controller recommends what volume to use, handles volume +// changes and clipping detection and prediction. In particular, it handles +// changes triggered by the user (e.g., volume set to zero by a HW mute button). +// This class is not thread-safe. +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class InputVolumeController final { + public: + // Config for the constructor. + struct Config { + // Minimum input volume that can be recommended. Not enforced when the + // applied input volume is zero outside startup. + int min_input_volume = 20; + // Lowest input volume level that will be applied in response to clipping. + int clipped_level_min = 70; + // Amount input volume level is lowered with every clipping event. Limited + // to (0, 255]. + int clipped_level_step = 15; + // Proportion of clipped samples required to declare a clipping event. + // Limited to (0.0f, 1.0f). + float clipped_ratio_threshold = 0.1f; + // Time in frames to wait after a clipping event before checking again. + // Limited to values higher than 0. + int clipped_wait_frames = 300; + // Enables clipping prediction functionality. + bool enable_clipping_predictor = false; + // Speech level target range (dBFS). If the speech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume + // adjustments are done based on the speech level. For speech levels below + // and above the range, the targets `target_range_min_dbfs` and + // `target_range_max_dbfs` are used, respectively. + int target_range_max_dbfs = -30; + int target_range_min_dbfs = -50; + // Number of wait frames between the recommended input volume updates. + int update_input_volume_wait_frames = 100; + // Speech probability threshold: speech probabilities below the threshold + // are considered silence. Limited to [0.0f, 1.0f]. + float speech_probability_threshold = 0.7f; + // Minimum speech frame ratio for volume updates to be allowed. Limited to + // [0.0f, 1.0f]. + float speech_ratio_threshold = 0.6f; + }; + + // Ctor. `num_capture_channels` specifies the number of channels for the audio + // passed to `AnalyzePreProcess()` and `Process()`. Clamps + // `config.startup_min_level` in the [12, 255] range. + InputVolumeController(int num_capture_channels, const Config& config); + + ~InputVolumeController(); + InputVolumeController(const InputVolumeController&) = delete; + InputVolumeController& operator=(const InputVolumeController&) = delete; + + // TODO(webrtc:7494): Integrate initialization into ctor and remove. + void Initialize(); + + // Analyzes `audio_buffer` before `RecommendInputVolume()` is called so tha + // the analysis can be performed before digital processing operations take + // place (e.g., echo cancellation). The analysis consists of input clipping + // detection and prediction (if enabled). + void AnalyzeInputAudio(int applied_input_volume, + const AudioBuffer& audio_buffer); + + // Adjusts the recommended input volume upwards/downwards based on the result + // of `AnalyzeInputAudio()` and on `speech_level_dbfs` (if specified). Must + // be called after `AnalyzeInputAudio()`. The value of `speech_probability` + // is expected to be in the range [0, 1] and `speech_level_dbfs` in the range + // [-90, 30] and both should be estimated after echo cancellation and noise + // suppression are applied. Returns a non-empty input volume recommendation if + // available. If `capture_output_used_` is true, returns the applied input + // volume. + absl::optional RecommendInputVolume( + float speech_probability, + absl::optional speech_level_dbfs); + + // Stores whether the capture output will be used or not. Call when the + // capture stream output has been flagged to be used/not-used. If unused, the + // controller disregards all incoming audio. + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Returns true if clipping prediction is enabled. + // TODO(bugs.webrtc.org/7494): Deprecate this method. + bool clipping_predictor_enabled() const { return !!clipping_predictor_; } + + // Returns true if clipping prediction is used to adjust the input volume. + // TODO(bugs.webrtc.org/7494): Deprecate this method. + bool use_clipping_predictor_step() const { + return use_clipping_predictor_step_; + } + + // Only use for testing: Use `RecommendInputVolume()` elsewhere. + // Returns the value of a member variable, needed for testing + // `AnalyzeInputAudio()`. + int recommended_input_volume() const { return recommended_input_volume_; } + + // Only use for testing. + bool capture_output_used() const { return capture_output_used_; } + + private: + friend class InputVolumeControllerTestHelper; + + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDefault); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeDisabled); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, + MinInputVolumeOutOfRangeAbove); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, + MinInputVolumeOutOfRangeBelow); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerTest, MinInputVolumeEnabled50); + FRIEND_TEST_ALL_PREFIXES(InputVolumeControllerParametrizedTest, + ClippingParametersVerified); + + // Sets the applied input volume and resets the recommended input volume. + void SetAppliedInputVolume(int level); + + void AggregateChannelLevels(); + + const int num_capture_channels_; + + // Minimum input volume that can be recommended. + const int min_input_volume_; + + // TODO(bugs.webrtc.org/7494): Once + // `AudioProcessingImpl::recommended_stream_analog_level()` becomes a trivial + // getter, leave uninitialized. + // Recommended input volume. After `SetAppliedInputVolume()` is called it + // holds holds the observed input volume. Possibly updated by + // `AnalyzePreProcess()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + // Applied input volume. After `SetAppliedInputVolume()` is called it holds + // the current applied volume. + absl::optional applied_input_volume_; + + bool capture_output_used_; + + // Clipping detection and prediction. + const int clipped_level_step_; + const float clipped_ratio_threshold_; + const int clipped_wait_frames_; + const std::unique_ptr clipping_predictor_; + const bool use_clipping_predictor_step_; + int frames_since_clipped_; + int clipping_rate_log_counter_; + float clipping_rate_log_; + + // Target range minimum and maximum. If the seech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no volume adjustments + // take place. Instead, the digital gain controller is assumed to adapt to + // compensate for the speech level RMS error. + const int target_range_max_dbfs_; + const int target_range_min_dbfs_; + + // Channel controllers updating the gain upwards/downwards. + std::vector> channel_controllers_; + int channel_controlling_gain_ = 0; +}; + +// TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming +// convention. +class MonoInputVolumeController { + public: + MonoInputVolumeController(int min_input_volume_after_clipping, + int min_input_volume, + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold); + ~MonoInputVolumeController(); + MonoInputVolumeController(const MonoInputVolumeController&) = delete; + MonoInputVolumeController& operator=(const MonoInputVolumeController&) = + delete; + + void Initialize(); + void HandleCaptureOutputUsedChange(bool capture_output_used); + + // Sets the current input volume. + void set_stream_analog_level(int input_volume) { + recommended_input_volume_ = input_volume; + } + + // Lowers the recommended input volume in response to clipping based on the + // suggested reduction `clipped_level_step`. Must be called after + // `set_stream_analog_level()`. + void HandleClipping(int clipped_level_step); + + // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. + // Adjusts the recommended input volume upwards/downwards depending on the + // result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only + // allowed for active speech segments and when `rms_error_dbfs` is not empty. + // Must be called after `HandleClipping()`. + void Process(absl::optional rms_error_dbfs, float speech_probability); + + // Returns the recommended input volume. Must be called after `Process()`. + int recommended_analog_level() const { return recommended_input_volume_; } + + void ActivateLogging() { log_to_histograms_ = true; } + + int min_input_volume_after_clipping() const { + return min_input_volume_after_clipping_; + } + + // Only used for testing. + int min_input_volume() const { return min_input_volume_; } + + private: + // Sets a new input volume, after first checking that it hasn't been updated + // by the user, in which case no action is taken. + void SetInputVolume(int new_volume); + + // Sets the maximum input volume that the input volume controller is allowed + // to apply. The volume must be at least `kClippedLevelMin`. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + + // Updates the recommended input volume. If the volume slider needs to be + // moved, we check first if the user has adjusted it, in which case we take no + // action and cache the updated level. + void UpdateInputVolume(int rms_error_dbfs); + + const int min_input_volume_; + const int min_input_volume_after_clipping_; + int max_input_volume_; + + int last_recommended_input_volume_ = 0; + + bool capture_output_used_ = true; + bool check_volume_on_next_process_ = true; + bool startup_ = true; + + // TODO(bugs.webrtc.org/7494): Create a separate member for the applied + // input volume. + // Recommended input volume. After `set_stream_analog_level()` is + // called, it holds the observed applied input volume. Possibly updated by + // `HandleClipping()` and `Process()`; after these calls, holds the + // recommended input volume. + int recommended_input_volume_ = 0; + + bool log_to_histograms_ = false; + + // Counters for frames and speech frames since the last update in the + // recommended input volume. + const int update_input_volume_wait_frames_; + int frames_since_update_input_volume_ = 0; + int speech_frames_since_update_input_volume_ = 0; + bool is_first_frame_ = true; + + // Speech probability threshold for a frame to be considered speech (instead + // of silence). Limited to [0.0f, 1.0f]. + const float speech_probability_threshold_; + // Minimum ratio of speech frames. Limited to [0.0f, 1.0f]. + const float speech_ratio_threshold_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build new file mode 100644 index 0000000000..582eb326f3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("input_volume_controller_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc new file mode 100644 index 0000000000..d1bdcf25a5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -0,0 +1,1857 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_controller.h" + +#include +#include +#include +#include +#include + +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::_; +using ::testing::AtLeast; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +constexpr int kSampleRateHz = 32000; +constexpr int kNumChannels = 1; +constexpr int kInitialInputVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +constexpr float kAboveClippedThreshold = 0.2f; +constexpr int kMinMicLevel = 20; +constexpr int kClippedLevelStep = 15; +constexpr float kClippedRatioThreshold = 0.1f; +constexpr int kClippedWaitFrames = 300; +constexpr float kHighSpeechProbability = 0.7f; +constexpr float kLowSpeechProbability = 0.1f; +constexpr float kSpeechLevel = -25.0f; +constexpr float kSpeechProbabilityThreshold = 0.5f; +constexpr float kSpeechRatioThreshold = 0.8f; + +constexpr float kMinSample = std::numeric_limits::min(); +constexpr float kMaxSample = std::numeric_limits::max(); + +using ClippingPredictorConfig = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor; + +using InputVolumeControllerConfig = InputVolumeController::Config; + +constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{}; + +std::unique_ptr CreateInputVolumeController( + int clipped_level_step = kClippedLevelStep, + float clipped_ratio_threshold = kClippedRatioThreshold, + int clipped_wait_frames = kClippedWaitFrames, + bool enable_clipping_predictor = false, + int update_input_volume_wait_frames = 0) { + InputVolumeControllerConfig config{ + .min_input_volume = kMinMicLevel, + .clipped_level_min = kClippedMin, + .clipped_level_step = clipped_level_step, + .clipped_ratio_threshold = clipped_ratio_threshold, + .clipped_wait_frames = clipped_wait_frames, + .enable_clipping_predictor = enable_clipping_predictor, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = update_input_volume_wait_frames, + .speech_probability_threshold = kSpeechProbabilityThreshold, + .speech_ratio_threshold = kSpeechRatioThreshold, + }; + + return std::make_unique(/*num_capture_channels=*/1, + config); +} + +// (Over)writes `samples_value` for the samples in `audio_buffer`. +// When `clipped_ratio`, a value in [0, 1], is greater than 0, the corresponding +// fraction of the frame is set to a full scale value to simulate clipping. +void WriteAudioBufferSamples(float samples_value, + float clipped_ratio, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + int num_channels = audio_buffer.num_channels(); + int num_samples = audio_buffer.num_frames(); + int num_clipping_samples = clipped_ratio * num_samples; + for (int ch = 0; ch < num_channels; ++ch) { + int i = 0; + for (; i < num_clipping_samples; ++i) { + audio_buffer.channels()[ch][i] = 32767.0f; + } + for (; i < num_samples; ++i) { + audio_buffer.channels()[ch][i] = samples_value; + } + } +} + +// (Over)writes samples in `audio_buffer`. Alternates samples `samples_value` +// and zero. +void WriteAlternatingAudioBufferSamples(float samples_value, + AudioBuffer& audio_buffer) { + RTC_DCHECK_GE(samples_value, kMinSample); + RTC_DCHECK_LE(samples_value, kMaxSample); + const int num_channels = audio_buffer.num_channels(); + const int num_frames = audio_buffer.num_frames(); + for (int ch = 0; ch < num_channels; ++ch) { + for (int i = 0; i < num_frames; i += 2) { + audio_buffer.channels()[ch][i] = samples_value; + audio_buffer.channels()[ch][i + 1] = 0.0f; + } + } +} + +// Reads a given number of 10 ms chunks from a PCM file and feeds them to +// `InputVolumeController`. +class SpeechSamplesReader { + private: + // Recording properties. + static constexpr int kPcmSampleRateHz = 16000; + static constexpr int kPcmNumChannels = 1; + static constexpr int kPcmBytesPerSamples = sizeof(int16_t); + + public: + SpeechSamplesReader() + : is_(test::ResourcePath("audio_processing/agc/agc_audio", "pcm"), + std::ios::binary | std::ios::ate), + audio_buffer_(kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels, + kPcmSampleRateHz, + kPcmNumChannels), + buffer_(audio_buffer_.num_frames()), + buffer_num_bytes_(buffer_.size() * kPcmBytesPerSamples) { + RTC_CHECK(is_); + } + + // Reads `num_frames` 10 ms frames from the beginning of the PCM file, applies + // `gain_db` and feeds the frames into `controller` by calling + // `AnalyzeInputAudio()` and `RecommendInputVolume()` for each frame. Reads + // the number of 10 ms frames available in the PCM file if `num_frames` is too + // large - i.e., does not loop. `speech_probability` and `speech_level_dbfs` + // are passed to `RecommendInputVolume()`. + int Feed(int num_frames, + int applied_input_volume, + int gain_db, + float speech_probability, + absl::optional speech_level_dbfs, + InputVolumeController& controller) { + RTC_DCHECK(controller.capture_output_used()); + + float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. + is_.seekg(0, is_.beg); // Start from the beginning of the PCM file. + + // Read and feed frames. + for (int i = 0; i < num_frames; ++i) { + is_.read(reinterpret_cast(buffer_.data()), buffer_num_bytes_); + if (is_.gcount() < buffer_num_bytes_) { + // EOF reached. Stop. + break; + } + // Apply gain and copy samples into `audio_buffer_`. + std::transform(buffer_.begin(), buffer_.end(), + audio_buffer_.channels()[0], [gain](int16_t v) -> float { + return rtc::SafeClamp(static_cast(v) * gain, + kMinSample, kMaxSample); + }); + controller.AnalyzeInputAudio(applied_input_volume, audio_buffer_); + const auto recommended_input_volume = controller.RecommendInputVolume( + speech_probability, speech_level_dbfs); + + // Expect no errors: Applied volume set for every frame; + // `RecommendInputVolume()` returns a non-empty value. + EXPECT_TRUE(recommended_input_volume.has_value()); + + applied_input_volume = *recommended_input_volume; + } + return applied_input_volume; + } + + private: + std::ifstream is_; + AudioBuffer audio_buffer_; + std::vector buffer_; + const std::streamsize buffer_num_bytes_; +}; + +// Runs the MonoInputVolumeControl processing sequence following the API +// contract. Returns the updated recommended input volume. +float UpdateRecommendedInputVolume(MonoInputVolumeController& mono_controller, + int applied_input_volume, + float speech_probability, + absl::optional rms_error_dbfs) { + mono_controller.set_stream_analog_level(applied_input_volume); + EXPECT_EQ(mono_controller.recommended_analog_level(), applied_input_volume); + mono_controller.Process(rms_error_dbfs, speech_probability); + return mono_controller.recommended_analog_level(); +} + +} // namespace + +// TODO(bugs.webrtc.org/12874): Use constexpr struct with designated +// initializers once fixed. +constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { + InputVolumeControllerConfig config{ + .clipped_level_min = kClippedMin, + .clipped_level_step = kClippedLevelStep, + .clipped_ratio_threshold = kClippedRatioThreshold, + .clipped_wait_frames = kClippedWaitFrames, + .enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 0, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 1.0f, + }; + return config; +} + +// Helper class that provides an `InputVolumeController` instance with an +// `AudioBuffer` instance and `CallAgcSequence()`, a helper method that runs the +// `InputVolumeController` instance on the `AudioBuffer` one by sticking to the +// API contract. +class InputVolumeControllerTestHelper { + public: + // Ctor. Initializes `audio_buffer` with zeros. + // TODO(bugs.webrtc.org/7494): Remove the default argument. + InputVolumeControllerTestHelper(const InputVolumeController::Config& config = + GetInputVolumeControllerTestConfig()) + : audio_buffer(kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels, + kSampleRateHz, + kNumChannels), + controller(/*num_capture_channels=*/1, config) { + controller.Initialize(); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + audio_buffer); + } + + // Calls the sequence of `InputVolumeController` methods according to the API + // contract, namely: + // - Sets the applied input volume; + // - Uses `audio_buffer` to call `AnalyzeInputAudio()` and + // `RecommendInputVolume()`; + // Returns the recommended input volume. + absl::optional CallAgcSequence(int applied_input_volume, + float speech_probability, + absl::optional speech_level_dbfs, + int num_calls = 1) { + RTC_DCHECK_GE(num_calls, 1); + absl::optional volume = applied_input_volume; + for (int i = 0; i < num_calls; ++i) { + // Repeat the initial volume if `RecommendInputVolume()` doesn't return a + // value. + controller.AnalyzeInputAudio(volume.value_or(applied_input_volume), + audio_buffer); + volume = controller.RecommendInputVolume(speech_probability, + speech_level_dbfs); + + // Allow deviation from the API contract: `RecommendInputVolume()` doesn't + // return a recommended input volume. + if (volume.has_value()) { + EXPECT_EQ(*volume, controller.recommended_input_volume()); + } + } + return volume; + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + int CallRecommendInputVolume(int num_calls, + int initial_volume, + float speech_probability, + absl::optional speech_level_dbfs) { + RTC_DCHECK(controller.capture_output_used()); + + // Create non-clipping audio for `AnalyzeInputAudio()`. + WriteAlternatingAudioBufferSamples(0.1f * kMaxSample, audio_buffer); + int volume = initial_volume; + for (int i = 0; i < num_calls; ++i) { + controller.AnalyzeInputAudio(volume, audio_buffer); + const auto recommended_input_volume = controller.RecommendInputVolume( + speech_probability, speech_level_dbfs); + + // Expect no errors: Applied volume set for every frame; + // `RecommendInputVolume()` returns a non-empty value. + EXPECT_TRUE(recommended_input_volume.has_value()); + + volume = *recommended_input_volume; + } + return volume; + } + + // Deprecated. + // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use + // `CallAgcSequence()`. + void CallAnalyzeInputAudio(int num_calls, float clipped_ratio) { + RTC_DCHECK(controller.capture_output_used()); + + RTC_DCHECK_GE(clipped_ratio, 0.0f); + RTC_DCHECK_LE(clipped_ratio, 1.0f); + WriteAudioBufferSamples(/*samples_value=*/0.0f, clipped_ratio, + audio_buffer); + for (int i = 0; i < num_calls; ++i) { + controller.AnalyzeInputAudio(controller.recommended_input_volume(), + audio_buffer); + } + } + + AudioBuffer audio_buffer; + InputVolumeController controller; +}; + +class InputVolumeControllerChannelSampleRateTest + : public ::testing::TestWithParam> { + protected: + int GetNumChannels() const { return std::get<0>(GetParam()); } + int GetSampleRateHz() const { return std::get<1>(GetParam()); } +}; + +TEST_P(InputVolumeControllerChannelSampleRateTest, CheckIsAlive) { + const int num_channels = GetNumChannels(); + const int sample_rate_hz = GetSampleRateHz(); + + constexpr InputVolumeController::Config kConfig{.enable_clipping_predictor = + true}; + InputVolumeController controller(num_channels, kConfig); + controller.Initialize(); + AudioBuffer buffer(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + constexpr int kStartupVolume = 100; + int applied_initial_volume = kStartupVolume; + + // Trigger a downward adaptation with clipping. + constexpr int kLevelWithinTargetDbfs = + (kConfig.target_range_min_dbfs + kConfig.target_range_max_dbfs) / 2; + WriteAlternatingAudioBufferSamples(/*samples_value=*/kMaxSample, buffer); + const int initial_volume1 = applied_initial_volume; + for (int i = 0; i < 400; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kLowSpeechProbability, + /*speech_level_dbfs=*/kLevelWithinTargetDbfs); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + ASSERT_LT(controller.recommended_input_volume(), initial_volume1); + + // Fill in audio that does not clip. + WriteAlternatingAudioBufferSamples(/*samples_value=*/1234.5f, buffer); + + // Trigger an upward adaptation. + const int initial_volume2 = controller.recommended_input_volume(); + for (int i = 0; i < kConfig.clipped_wait_frames; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kHighSpeechProbability, + /*speech_level_dbfs=*/kConfig.target_range_min_dbfs - 5); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + EXPECT_GT(controller.recommended_input_volume(), initial_volume2); + + // Trigger a downward adaptation. + const int initial_volume = controller.recommended_input_volume(); + for (int i = 0; i < kConfig.update_input_volume_wait_frames; ++i) { + controller.AnalyzeInputAudio(applied_initial_volume, buffer); + auto recommended_input_volume = controller.RecommendInputVolume( + kHighSpeechProbability, + /*speech_level_dbfs=*/kConfig.target_range_max_dbfs + 5); + ASSERT_TRUE(recommended_input_volume.has_value()); + applied_initial_volume = *recommended_input_volume; + } + EXPECT_LT(controller.recommended_input_volume(), initial_volume); +} + +INSTANTIATE_TEST_SUITE_P( + , + InputVolumeControllerChannelSampleRateTest, + ::testing::Combine(::testing::Values(1, 2, 3, 6), + ::testing::Values(8000, 16000, 32000, 48000))); + +class InputVolumeControllerParametrizedTest + : public ::testing::TestWithParam {}; + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeAboveMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + + EXPECT_EQ(*helper.CallAgcSequence(/*applied_input_volume=*/128, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80), + 128); +} + +TEST_P( + InputVolumeControllerParametrizedTest, + StartupMinVolumeConfigurationRespectedWhenAppliedInputVolumeMaybeBelowMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + + EXPECT_GE(*helper.CallAgcSequence(/*applied_input_volume=*/10, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80), + 10); +} + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeRespectedWhenAppliedVolumeNonZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + // Volume change possible; speech level below the digital gain window. + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, kMinInputVolume); +} + +TEST_P(InputVolumeControllerParametrizedTest, + MinVolumeRepeatedlyRespectedWhenAppliedVolumeNonZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + // Volume change possible; speech level below the digital gain window. + for (int i = 0; i < 100; ++i) { + const int volume = *helper.CallAgcSequence(/*applied_input_volume=*/1, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + EXPECT_GE(volume, kMinInputVolume); + } +} + +TEST_P(InputVolumeControllerParametrizedTest, + StartupMinVolumeRespectedOnceWhenAppliedVolumeZero) { + const int kMinInputVolume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = kMinInputVolume, + .target_range_min_dbfs = -30, + .update_input_volume_wait_frames = 1, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 0.5f}); + + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, kMinInputVolume); + + // No change of volume regardless of a speech level below the digital gain + // window; applied volume is zero. + volume = *helper.CallAgcSequence(/*applied_input_volume=*/0, + /*speech_probability=*/0.9f, + /*speech_level_dbfs=*/-80); + + EXPECT_EQ(volume, 0); +} + +TEST_P(InputVolumeControllerParametrizedTest, MicVolumeResponseToRmsError) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -23.0f); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -28.0f); + + // Above the digital gain's window; volume should be increased. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, 128); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -38.0f); + EXPECT_EQ(volume, 156); + + // Inside the digital gain's window; no change of volume. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -23.0f); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -18.0f); + + // Below the digial gain's window; volume should be decreased. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 155); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 151); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -9.0f); + EXPECT_EQ(volume, 119); +} + +TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + const int min_input_volume = GetParam(); + config.min_input_volume = min_input_volume; + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Maximum upwards change is limited. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 183); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 243); + + // Won't go higher than the maximum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 255); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 254); + + // Maximum downwards change is limited. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 194); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 137); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 88); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 54); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, 33); + + // Won't go lower than the minimum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, std::max(18, min_input_volume)); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, 22.0f); + EXPECT_EQ(volume, std::max(12, min_input_volume)); +} + +TEST_P(InputVolumeControllerParametrizedTest, NoActionWhileMuted) { + InputVolumeControllerTestHelper helper_1( + /*config=*/{.min_input_volume = GetParam()}); + InputVolumeControllerTestHelper helper_2( + /*config=*/{.min_input_volume = GetParam()}); + + int volume_1 = *helper_1.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + int volume_2 = *helper_2.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_EQ(volume_1, 255); + EXPECT_EQ(volume_2, 255); + + helper_2.controller.HandleCaptureOutputUsedChange(false); + + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + + volume_1 = + *helper_1.CallAgcSequence(volume_1, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + volume_2 = + *helper_2.CallAgcSequence(volume_2, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_LT(volume_1, 255); + EXPECT_EQ(volume_2, 255); +} + +TEST_P(InputVolumeControllerParametrizedTest, + UnmutingChecksVolumeWithoutRaising) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.controller.HandleCaptureOutputUsedChange(false); + helper.controller.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 127; + + // SetMicVolume should not be called. + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, + kHighSpeechProbability, kSpeechLevel), + kInputVolume); +} + +TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) { + const int min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = min_input_volume}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.controller.HandleCaptureOutputUsedChange(false); + helper.controller.HandleCaptureOutputUsedChange(true); + + constexpr int kInputVolume = 11; + + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/1, kInputVolume, + kHighSpeechProbability, kSpeechLevel), + min_input_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ManualLevelChangeResultsInNoSetMicCall) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + ASSERT_NE(volume, 154); + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/154, kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, 154); + + // Do the same thing, except downwards now. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/100, kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 100); + + // And finally verify the AGC continues working without a manual change. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, 99); +} + +TEST_P(InputVolumeControllerParametrizedTest, + RecoveryAfterManualLevelChangeFromMax) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 183); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 243); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 255); + + // Manual change does not result in SetMicVolume call. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/50, kHighSpeechProbability, -17.0f); + EXPECT_EQ(helper.controller.recommended_input_volume(), 50); + + // Continues working as usual afterwards. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -38.0f); + + EXPECT_EQ(volume, 65); +} + +// Checks that the minimum input volume is enforced during the upward adjustment +// of the input volume. +TEST_P(InputVolumeControllerParametrizedTest, + EnforceMinInputVolumeDuringUpwardsAdjustment) { + const int min_input_volume = GetParam(); + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = min_input_volume; + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Manual change below min, but strictly positive, otherwise no action will be + // taken. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); + + // Trigger an upward adjustment of the input volume. + EXPECT_EQ(volume, min_input_volume); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -29.0f); + EXPECT_EQ(volume, min_input_volume); + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -30.0f); + EXPECT_EQ(volume, min_input_volume); + + // After a number of consistently low speech level observations, the input + // volume is eventually raised above the minimum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, + kHighSpeechProbability, -38.0f); + EXPECT_GT(volume, min_input_volume); +} + +// Checks that, when the min mic level override is specified, AGC immediately +// applies the minimum mic level after the mic level is manually set below the +// minimum gain to enforce. +TEST_P(InputVolumeControllerParametrizedTest, + RecoveryAfterManualLevelChangeBelowMin) { + const int min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = min_input_volume}); + int volume = *helper.CallAgcSequence(kInitialInputVolume, + kHighSpeechProbability, kSpeechLevel); + + // Manual change below min, but strictly positive, otherwise + // AGC won't take any action. + volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume=*/1, kHighSpeechProbability, -17.0f); + EXPECT_EQ(volume, min_input_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/100, /*clipped_ratio=*/0); + EXPECT_EQ(helper.controller.recommended_input_volume(), 128); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingUnderThresholdHasNoImpact) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.099); + EXPECT_EQ(helper.controller.recommended_input_volume(), 128); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, /*clipped_ratio=*/0.2); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, + WaitingPeriodBetweenClippingChecks) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallAnalyzeInputAudio(/*num_calls=*/300, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 225); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/180, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1000, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), kClippedMin); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingMaxIsRespectedWhenEqualToLevel) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); + + helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/240, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(helper.controller.recommended_input_volume(), 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingMaxIsRespectedWhenHigherThanLevel) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/200, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + int volume = helper.controller.recommended_input_volume(); + EXPECT_EQ(volume, 185); + + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 240); + volume = helper.CallRecommendInputVolume(/*num_calls=*/10, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 240); +} + +TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/225, kHighSpeechProbability, + kSpeechLevel); + + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), 210); + + // User changed the volume. + int volume = helper.CallRecommendInputVolume( + /*num_calls=*/1, /*initial_volume-*/ 250, kHighSpeechProbability, -32.0f); + EXPECT_EQ(volume, 250); + + // Move down... + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -8.0f); + EXPECT_EQ(volume, 210); + // And back up to the new max established by the user. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -58.0f); + EXPECT_EQ(volume, 250); + // Will not move above new maximum. + volume = helper.CallRecommendInputVolume(/*num_calls=*/1, volume, + kHighSpeechProbability, -48.0f); + EXPECT_EQ(volume, 250); +} + +TEST_P(InputVolumeControllerParametrizedTest, + ClippingDoesNotPullLowVolumeBackUp) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + helper.CallAgcSequence(/*applied_input_volume=*/80, kHighSpeechProbability, + kSpeechLevel); + + int initial_volume = helper.controller.recommended_input_volume(); + helper.CallAnalyzeInputAudio(/*num_calls=*/1, + /*clipped_ratio=*/kAboveClippedThreshold); + EXPECT_EQ(helper.controller.recommended_input_volume(), initial_volume); +} + +TEST_P(InputVolumeControllerParametrizedTest, TakesNoActionOnZeroMicVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = GetParam()}); + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, + kSpeechLevel); + + EXPECT_EQ( + helper.CallRecommendInputVolume(/*num_calls=*/10, /*initial_volume=*/0, + kHighSpeechProbability, -48.0f), + 0); +} + +TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) { + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.min_input_volume = GetParam(); + InputVolumeControllerTestHelper helper(config); + int volume = *helper.CallAgcSequence(/*applied_input_volume=*/255, + kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/1); + + EXPECT_EQ(volume, 255); + + WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); + volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/100); + + EXPECT_EQ(volume, 255); + + WriteAlternatingAudioBufferSamples(kMaxSample, helper.audio_buffer); + volume = *helper.CallAgcSequence(volume, kHighSpeechProbability, kSpeechLevel, + /*num_calls=*/100); + + EXPECT_EQ(volume, 240); +} + +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_level_step`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_ratio_threshold`. +// TODO(bugs.webrtc.org/12774): Test the bahavior of `clipped_wait_frames`. +// Verifies that configurable clipping parameters are initialized as intended. +TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); + controller->Initialize(); + EXPECT_EQ(controller->clipped_level_step_, kClippedLevelStep); + EXPECT_EQ(controller->clipped_ratio_threshold_, kClippedRatioThreshold); + EXPECT_EQ(controller->clipped_wait_frames_, kClippedWaitFrames); + std::unique_ptr controller_custom = + CreateInputVolumeController(/*clipped_level_step=*/10, + /*clipped_ratio_threshold=*/0.2f, + /*clipped_wait_frames=*/50); + controller_custom->Initialize(); + EXPECT_EQ(controller_custom->clipped_level_step_, 10); + EXPECT_EQ(controller_custom->clipped_ratio_threshold_, 0.2f); + EXPECT_EQ(controller_custom->clipped_wait_frames_, 50); +} + +TEST_P(InputVolumeControllerParametrizedTest, + DisableClippingPredictorDisablesClippingPredictor) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false); + controller->Initialize(); + + EXPECT_FALSE(controller->clipping_predictor_enabled()); + EXPECT_FALSE(controller->use_clipping_predictor_step()); +} + +TEST_P(InputVolumeControllerParametrizedTest, + EnableClippingPredictorEnablesClippingPredictor) { + std::unique_ptr controller = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/true); + controller->Initialize(); + + EXPECT_TRUE(controller->clipping_predictor_enabled()); + EXPECT_TRUE(controller->use_clipping_predictor_step()); +} + +TEST_P(InputVolumeControllerParametrizedTest, + DisableClippingPredictorDoesNotLowerVolume) { + int volume = 255; + InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); + config.enable_clipping_predictor = false; + auto helper = InputVolumeControllerTestHelper(config); + helper.controller.Initialize(); + + EXPECT_FALSE(helper.controller.clipping_predictor_enabled()); + EXPECT_FALSE(helper.controller.use_clipping_predictor_step()); + + // Expect no change if clipping prediction is enabled. + for (int j = 0; j < 31; ++j) { + WriteAlternatingAudioBufferSamples(0.99f * kMaxSample, helper.audio_buffer); + volume = + *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, + /*num_calls=*/5); + + WriteAudioBufferSamples(0.99f * kMaxSample, /*clipped_ratio=*/0.0f, + helper.audio_buffer); + volume = + *helper.CallAgcSequence(volume, kLowSpeechProbability, kSpeechLevel, + /*num_calls=*/5); + + EXPECT_EQ(volume, 255); + } +} + +// TODO(bugs.webrtc.org/7494): Split into several smaller tests. +TEST_P(InputVolumeControllerParametrizedTest, + UsedClippingPredictionsProduceLowerAnalogLevels) { + constexpr int kInitialLevel = 255; + constexpr float kCloseToClippingPeakRatio = 0.99f; + int volume_1 = kInitialLevel; + int volume_2 = kInitialLevel; + + // Create two helpers, one with clipping prediction and one without. + auto config_1 = GetInputVolumeControllerTestConfig(); + auto config_2 = GetInputVolumeControllerTestConfig(); + config_1.enable_clipping_predictor = true; + config_2.enable_clipping_predictor = false; + auto helper_1 = InputVolumeControllerTestHelper(config_1); + auto helper_2 = InputVolumeControllerTestHelper(config_2); + helper_1.controller.Initialize(); + helper_2.controller.Initialize(); + + EXPECT_TRUE(helper_1.controller.clipping_predictor_enabled()); + EXPECT_FALSE(helper_2.controller.clipping_predictor_enabled()); + EXPECT_TRUE(helper_1.controller.use_clipping_predictor_step()); + + // Expect a change if clipping prediction is enabled. + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect no change during waiting. + for (int i = 0; i < kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + } + + // Expect a change when the prediction step is used. + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_1.audio_buffer); + WriteAudioBufferSamples(kCloseToClippingPeakRatio * kMaxSample, + /*clipped_ratio=*/0.0f, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect no change when clipping is not detected or predicted. + for (int i = 0; i < 2 * kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, + helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(/*samples_value=*/0.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + helper_1.audio_buffer); + WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipped_ratio=*/0.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + } + + EXPECT_EQ(volume_1, kInitialLevel - 2 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel); + + // Expect a change for clipping frames. + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 1); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 1); + + EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); + + // Expect no change during waiting. + for (int i = 0; i < kClippedWaitFrames / 10; ++i) { + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + + WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, + helper_1.audio_buffer); + WriteAudioBufferSamples(kMaxSample, /*clipped_ratio=*/1.0f, + helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 5); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 5); + } + + EXPECT_EQ(volume_1, kInitialLevel - 3 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - kClippedLevelStep); + + // Expect a change for clipping frames. + WriteAlternatingAudioBufferSamples(kMaxSample, helper_1.audio_buffer); + WriteAlternatingAudioBufferSamples(kMaxSample, helper_2.audio_buffer); + volume_1 = *helper_1.CallAgcSequence(volume_1, kLowSpeechProbability, + kSpeechLevel, 1); + volume_2 = *helper_2.CallAgcSequence(volume_2, kLowSpeechProbability, + kSpeechLevel, 1); + + EXPECT_EQ(volume_1, kInitialLevel - 4 * kClippedLevelStep); + EXPECT_EQ(volume_2, kInitialLevel - 2 * kClippedLevelStep); +} + +// Checks that passing an empty speech level has no effect on the input volume. +TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) { + InputVolumeController controller(kNumChannels, + GetInputVolumeControllerTestConfig()); + controller.Initialize(); + + // Feed speech with low energy that would trigger an upward adapation of + // the analog level if an speech level was not low and the RMS level empty. + constexpr int kNumFrames = 125; + constexpr int kGainDb = -20; + SpeechSamplesReader reader; + int volume = reader.Feed(kNumFrames, kInitialInputVolume, kGainDb, + kLowSpeechProbability, absl::nullopt, controller); + + // Check that no adaptation occurs. + ASSERT_EQ(volume, kInitialInputVolume); +} + +// Checks that the recommended input volume is not updated unless enough +// frames have been processed after the previous update. +TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + std::unique_ptr controller_wait_0 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/0); + std::unique_ptr controller_wait_100 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/100); + controller_wait_0->Initialize(); + controller_wait_100->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + int volume_wait_0 = reader_1.Feed( + /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_0); + int volume_wait_100 = reader_2.Feed( + /*num_frames=*/99, kInputVolume, /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_100); + + // Check that adaptation only occurs if enough frames have been processed. + ASSERT_GT(volume_wait_0, kInputVolume); + ASSERT_EQ(volume_wait_100, kInputVolume); + + volume_wait_0 = + reader_1.Feed(/*num_frames=*/1, volume_wait_0, + /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_0); + volume_wait_100 = + reader_2.Feed(/*num_frames=*/1, volume_wait_100, + /*gain_db=*/0, kHighSpeechProbability, + /*speech_level_dbfs=*/-42.0f, *controller_wait_100); + + // Check that adaptation only occurs when enough frames have been processed. + ASSERT_GT(volume_wait_0, kInputVolume); + ASSERT_GT(volume_wait_100, kInputVolume); +} + +INSTANTIATE_TEST_SUITE_P(, + InputVolumeControllerParametrizedTest, + ::testing::Values(12, 20)); + +TEST(InputVolumeControllerTest, + MinInputVolumeEnforcedWithClippingWhenAboveClippedLevelMin) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = 80, .clipped_level_min = 70}); + + // Trigger a downward adjustment caused by clipping input. Use a low speech + // probability to limit the volume changes to clipping handling. + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + helper.audio_buffer); + constexpr int kNumCalls = 800; + helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, + /*speech_level_dbfs=*/-18.0f, kNumCalls); + + EXPECT_EQ(helper.controller.recommended_input_volume(), 80); +} + +TEST(InputVolumeControllerTest, + ClippedlevelMinEnforcedWithClippingWhenAboveMinInputVolume) { + InputVolumeControllerTestHelper helper( + /*config=*/{.min_input_volume = 70, .clipped_level_min = 80}); + + // Trigger a downward adjustment caused by clipping input. Use a low speech + // probability to limit the volume changes to clipping handling. + WriteAudioBufferSamples(/*samples_value=*/4000.0f, /*clipped_ratio=*/0.8f, + helper.audio_buffer); + constexpr int kNumCalls = 800; + helper.CallAgcSequence(/*applied_input_volume=*/100, kLowSpeechProbability, + /*speech_level_dbfs=*/-18.0f, kNumCalls); + + EXPECT_EQ(helper.controller.recommended_input_volume(), 80); +} + +TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with 10 frames between volume updates + // and the minimum speech ratio of 0.8 and speech probability threshold 0.5. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.7f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.4f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed( + /*num_frames=*/7, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed( + /*num_frames=*/7, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_GT(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); +} + +TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with the exact same settings and + // 10 frames between volume updates. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + // Process with two sets of inputs: Use `reader_1` to process inputs + // that make the volume to be adjusted after enough frames have been + // processsed and `reader_2` to process inputs that won't make the volume + // to be adjusted. + int volume_1 = reader_1.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.5f, + /*speech_level_dbfs=*/-42.0f, *controller_1); + int volume_2 = reader_2.Feed(/*num_frames=*/1, kInputVolume, /*gain_db=*/0, + /*speech_probability=*/0.49f, + /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + reader_1.Feed(/*num_frames=*/2, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/2, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level_dbfs=*/-42.0f, + *controller_2); + + ASSERT_EQ(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); + + volume_1 = reader_1.Feed( + /*num_frames=*/7, volume_1, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_1); + volume_2 = reader_2.Feed( + /*num_frames=*/7, volume_2, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level_dbfs=*/-42.0f, *controller_2); + + ASSERT_GT(volume_1, kInputVolume); + ASSERT_EQ(volume_2, kInputVolume); +} + +TEST(InputVolumeControllerTest, + DoNotLogRecommendedInputVolumeOnChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + // Trigger a downward volume change by inputting audio that clips. Pass a + // speech level that falls in the target range to make sure that the + // adaptation is not made to match the target range. + constexpr int kStartupVolume = 255; + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/50, kHighSpeechProbability, + /*speech_level_dbfs=*/-20.0f, *controller); + ASSERT_LT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::IsEmpty()); +} + +TEST(InputVolumeControllerTest, + LogRecommendedInputVolumeOnUpwardChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + constexpr int kStartupVolume = 100; + // Trigger an upward volume change by inputting audio that does not clip and + // by passing a speech level below the target range. + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/-6, kHighSpeechProbability, + /*speech_level_dbfs=*/-50.0f, *controller); + ASSERT_GT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::Not(::testing::IsEmpty())); +} + +TEST(InputVolumeControllerTest, + LogRecommendedInputVolumeOnDownwardChangeToMatchTarget) { + metrics::Reset(); + + SpeechSamplesReader reader; + auto controller = CreateInputVolumeController(); + controller->Initialize(); + constexpr int kStartupVolume = 100; + // Trigger a downward volume change by inputting audio that does not clip and + // by passing a speech level above the target range. + const int volume = reader.Feed(/*num_frames=*/14, kStartupVolume, + /*gain_db=*/-6, kHighSpeechProbability, + /*speech_level_dbfs=*/-5.0f, *controller); + ASSERT_LT(volume, kStartupVolume); + EXPECT_METRIC_THAT( + metrics::Samples( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget"), + ::testing::Not(::testing::IsEmpty())); +} + +TEST(MonoInputVolumeControllerTest, CheckHandleClippingLowersVolume) { + constexpr int kInitialInputVolume = 100; + constexpr int kInputVolumeStep = 29; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/70, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + UpdateRecommendedInputVolume(mono_controller, kInitialInputVolume, + kLowSpeechProbability, + /*rms_error_dbfs*/ -10.0f); + + mono_controller.HandleClipping(kInputVolumeStep); + + EXPECT_EQ(mono_controller.recommended_analog_level(), + kInitialInputVolume - kInputVolumeStep); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, + /*speech_probability_threshold=*/0.7, + /*speech_ratio_threshold=*/0.8); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, -30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, 14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, 14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, 15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, 30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, 15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, 30.0f); + + EXPECT_GT(volume_1, kInitialInputVolume); + EXPECT_GT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessRmsErrorDecreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, -10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, 10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, CheckClippedLevelMinIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kClippedLevelMin = 70; + MonoInputVolumeController mono_controller_1( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + // Process one frame to reset the state for `HandleClipping()`. + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + + mono_controller_1.HandleClipping(29); + mono_controller_2.HandleClipping(31); + + EXPECT_EQ(mono_controller_2.recommended_analog_level(), kClippedLevelMin); + EXPECT_LT(mono_controller_2.recommended_analog_level(), + mono_controller_1.recommended_analog_level()); +} + +TEST(MonoInputVolumeControllerTest, CheckMinMicLevelIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kMinMicLevel = 64; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, kMinMicLevel); +} + +TEST(MonoInputVolumeControllerTest, + CheckUpdateInputVolumeWaitFramesIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/1, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_2, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckSpeechProbabilityThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr float kSpeechProbabilityThreshold = 0.8f; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = + UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + int volume_2 = + UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kSpeechProbabilityThreshold - 0.1f, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, CheckSpeechRatioThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kLowSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessEmptyRmsErrorDoesNotLowerVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kHighSpeechProbability, absl::nullopt); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc new file mode 100644 index 0000000000..05624b1f92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" + +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using InputVolumeType = InputVolumeStatsReporter::InputVolumeType; + +constexpr int kFramesIn60Seconds = 6000; +constexpr int kMinInputVolume = 0; +constexpr int kMaxInputVolume = 255; +constexpr int kMaxUpdate = kMaxInputVolume - kMinInputVolume; + +int ComputeAverageUpdate(int sum_updates, int num_updates) { + RTC_DCHECK_GE(sum_updates, 0); + RTC_DCHECK_LE(sum_updates, kMaxUpdate * kFramesIn60Seconds); + RTC_DCHECK_GE(num_updates, 0); + RTC_DCHECK_LE(num_updates, kFramesIn60Seconds); + if (num_updates == 0) { + return 0; + } + return std::round(static_cast(sum_updates) / + static_cast(num_updates)); +} + +constexpr absl::string_view MetricNamePrefix( + InputVolumeType input_volume_type) { + switch (input_volume_type) { + case InputVolumeType::kApplied: + return "WebRTC.Audio.Apm.AppliedInputVolume."; + case InputVolumeType::kRecommended: + return "WebRTC.Audio.Apm.RecommendedInputVolume."; + } +} + +metrics::Histogram* CreateVolumeHistogram(InputVolumeType input_volume_type) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << "OnChange"; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kMaxInputVolume, + /*bucket_count=*/50); +} + +metrics::Histogram* CreateRateHistogram(InputVolumeType input_volume_type, + absl::string_view name) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << name; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kFramesIn60Seconds, + /*bucket_count=*/50); +} + +metrics::Histogram* CreateAverageHistogram(InputVolumeType input_volume_type, + absl::string_view name) { + char buffer[64]; + rtc::SimpleStringBuilder builder(buffer); + builder << MetricNamePrefix(input_volume_type) << name; + return metrics::HistogramFactoryGetCountsLinear(/*name=*/builder.str(), + /*min=*/1, + /*max=*/kMaxUpdate, + /*bucket_count=*/50); +} + +} // namespace + +InputVolumeStatsReporter::InputVolumeStatsReporter(InputVolumeType type) + : histograms_( + {.on_volume_change = CreateVolumeHistogram(type), + .decrease_rate = CreateRateHistogram(type, "DecreaseRate"), + .decrease_average = CreateAverageHistogram(type, "DecreaseAverage"), + .increase_rate = CreateRateHistogram(type, "IncreaseRate"), + .increase_average = CreateAverageHistogram(type, "IncreaseAverage"), + .update_rate = CreateRateHistogram(type, "UpdateRate"), + .update_average = CreateAverageHistogram(type, "UpdateAverage")}), + cannot_log_stats_(!histograms_.AllPointersSet()) { + if (cannot_log_stats_) { + RTC_LOG(LS_WARNING) << "Will not log any `" << MetricNamePrefix(type) + << "*` histogram stats."; + } +} + +InputVolumeStatsReporter::~InputVolumeStatsReporter() = default; + +void InputVolumeStatsReporter::UpdateStatistics(int input_volume) { + if (cannot_log_stats_) { + // Since the stats cannot be logged, do not bother updating them. + return; + } + + RTC_DCHECK_GE(input_volume, kMinInputVolume); + RTC_DCHECK_LE(input_volume, kMaxInputVolume); + if (previous_input_volume_.has_value() && + input_volume != previous_input_volume_.value()) { + // Update stats when the input volume changes. + metrics::HistogramAdd(histograms_.on_volume_change, input_volume); + // Update stats that are periodically logged. + const int volume_change = input_volume - previous_input_volume_.value(); + if (volume_change < 0) { + ++volume_update_stats_.num_decreases; + volume_update_stats_.sum_decreases -= volume_change; + } else { + ++volume_update_stats_.num_increases; + volume_update_stats_.sum_increases += volume_change; + } + } + // Periodically log input volume change metrics. + if (++log_volume_update_stats_counter_ >= kFramesIn60Seconds) { + LogVolumeUpdateStats(); + volume_update_stats_ = {}; + log_volume_update_stats_counter_ = 0; + } + previous_input_volume_ = input_volume; +} + +void InputVolumeStatsReporter::LogVolumeUpdateStats() const { + // Decrease rate and average. + metrics::HistogramAdd(histograms_.decrease_rate, + volume_update_stats_.num_decreases); + if (volume_update_stats_.num_decreases > 0) { + int average_decrease = ComputeAverageUpdate( + volume_update_stats_.sum_decreases, volume_update_stats_.num_decreases); + metrics::HistogramAdd(histograms_.decrease_average, average_decrease); + } + // Increase rate and average. + metrics::HistogramAdd(histograms_.increase_rate, + volume_update_stats_.num_increases); + if (volume_update_stats_.num_increases > 0) { + int average_increase = ComputeAverageUpdate( + volume_update_stats_.sum_increases, volume_update_stats_.num_increases); + metrics::HistogramAdd(histograms_.increase_average, average_increase); + } + // Update rate and average. + int num_updates = + volume_update_stats_.num_decreases + volume_update_stats_.num_increases; + metrics::HistogramAdd(histograms_.update_rate, num_updates); + if (num_updates > 0) { + int average_update = ComputeAverageUpdate( + volume_update_stats_.sum_decreases + volume_update_stats_.sum_increases, + num_updates); + metrics::HistogramAdd(histograms_.update_average, average_update); + } +} + +void UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(int volume) { + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Apm.RecommendedInputVolume.OnChangeToMatchTarget", volume, + 1, kMaxInputVolume, 50); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h new file mode 100644 index 0000000000..31b110031c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ + +#include "absl/types/optional.h" +#include "rtc_base/gtest_prod_util.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +// Input volume statistics calculator. Computes aggregate stats based on the +// framewise input volume observed by `UpdateStatistics()`. Periodically logs +// the statistics into a histogram. +class InputVolumeStatsReporter { + public: + enum class InputVolumeType { + kApplied = 0, + kRecommended = 1, + }; + + explicit InputVolumeStatsReporter(InputVolumeType input_volume_type); + InputVolumeStatsReporter(const InputVolumeStatsReporter&) = delete; + InputVolumeStatsReporter operator=(const InputVolumeStatsReporter&) = delete; + ~InputVolumeStatsReporter(); + + // Updates the stats based on `input_volume`. Periodically logs the stats into + // a histogram. + void UpdateStatistics(int input_volume); + + private: + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsForEmptyStats); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterNoVolumeChange); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeIncrease); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeDecrease); + FRIEND_TEST_ALL_PREFIXES(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterReset); + + // Stores input volume update stats to enable calculation of update rate and + // average update separately for volume increases and decreases. + struct VolumeUpdateStats { + int num_decreases = 0; + int num_increases = 0; + int sum_decreases = 0; + int sum_increases = 0; + } volume_update_stats_; + + // Returns a copy of the stored statistics. Use only for testing. + VolumeUpdateStats volume_update_stats() const { return volume_update_stats_; } + + // Computes aggregate stat and logs them into a histogram. + void LogVolumeUpdateStats() const; + + // Histograms. + struct Histograms { + metrics::Histogram* const on_volume_change; + metrics::Histogram* const decrease_rate; + metrics::Histogram* const decrease_average; + metrics::Histogram* const increase_rate; + metrics::Histogram* const increase_average; + metrics::Histogram* const update_rate; + metrics::Histogram* const update_average; + bool AllPointersSet() const { + return !!on_volume_change && !!decrease_rate && !!decrease_average && + !!increase_rate && !!increase_average && !!update_rate && + !!update_average; + } + } histograms_; + + // True if the stats cannot be logged. + const bool cannot_log_stats_; + + int log_volume_update_stats_counter_ = 0; + absl::optional previous_input_volume_ = absl::nullopt; +}; + +// Updates the histogram that keeps track of recommended input volume changes +// required in order to match the target level in the input volume adaptation +// process. +void UpdateHistogramOnRecommendedInputVolumeChangeToMatchTarget(int volume); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build new file mode 100644 index 0000000000..5a662f4079 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("input_volume_stats_reporter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc new file mode 100644 index 0000000000..e762c1fb59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/input_volume_stats_reporter_unittest.cc @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/metrics.h" +#include "test/gmock.h" + +namespace webrtc { +namespace { + +using InputVolumeType = InputVolumeStatsReporter::InputVolumeType; + +constexpr int kFramesIn60Seconds = 6000; + +constexpr absl::string_view kLabelPrefix = "WebRTC.Audio.Apm."; + +class InputVolumeStatsReporterTest + : public ::testing::TestWithParam { + public: + InputVolumeStatsReporterTest() { metrics::Reset(); } + + protected: + InputVolumeType InputVolumeType() const { return GetParam(); } + std::string VolumeLabel() const { + return (rtc::StringBuilder(kLabelPrefix) << VolumeTypeLabel() << "OnChange") + .str(); + } + std::string DecreaseRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "DecreaseRate") + .str(); + } + std::string DecreaseAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "DecreaseAverage") + .str(); + } + std::string IncreaseRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "IncreaseRate") + .str(); + } + std::string IncreaseAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "IncreaseAverage") + .str(); + } + std::string UpdateRateLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "UpdateRate") + .str(); + } + std::string UpdateAverageLabel() const { + return (rtc::StringBuilder(kLabelPrefix) + << VolumeTypeLabel() << "UpdateAverage") + .str(); + } + + private: + absl::string_view VolumeTypeLabel() const { + switch (InputVolumeType()) { + case InputVolumeType::kApplied: + return "AppliedInputVolume."; + case InputVolumeType::kRecommended: + return "RecommendedInputVolume."; + } + } +}; + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeOnChangeIsEmpty) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(10); + EXPECT_METRIC_THAT(metrics::Samples(VolumeLabel()), ::testing::ElementsAre()); +} + +TEST_P(InputVolumeStatsReporterTest, CheckRateAverageStatsEmpty) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + constexpr int kInputVolume = 10; + stats_reporter.UpdateStatistics(kInputVolume); + // Update almost until the periodic logging and reset. + for (int i = 0; i < kFramesIn60Seconds - 2; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume + 2); + stats_reporter.UpdateStatistics(kInputVolume); + } + EXPECT_METRIC_THAT(metrics::Samples(UpdateRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(DecreaseRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(IncreaseRateLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(UpdateAverageLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(DecreaseAverageLabel()), + ::testing::ElementsAre()); + EXPECT_METRIC_THAT(metrics::Samples(IncreaseAverageLabel()), + ::testing::ElementsAre()); +} + +TEST_P(InputVolumeStatsReporterTest, CheckSamples) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + + constexpr int kInputVolume1 = 10; + stats_reporter.UpdateStatistics(kInputVolume1); + // Update until periodic logging. + constexpr int kInputVolume2 = 12; + for (int i = 0; i < kFramesIn60Seconds; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume2); + stats_reporter.UpdateStatistics(kInputVolume1); + } + // Update until periodic logging. + constexpr int kInputVolume3 = 13; + for (int i = 0; i < kFramesIn60Seconds; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume3); + stats_reporter.UpdateStatistics(kInputVolume1); + } + + // Check volume changes stats. + EXPECT_METRIC_THAT( + metrics::Samples(VolumeLabel()), + ::testing::ElementsAre( + ::testing::Pair(kInputVolume1, kFramesIn60Seconds), + ::testing::Pair(kInputVolume2, kFramesIn60Seconds / 2), + ::testing::Pair(kInputVolume3, kFramesIn60Seconds / 2))); + + // Check volume change rate stats. + EXPECT_METRIC_THAT( + metrics::Samples(UpdateRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds - 1, 1), + ::testing::Pair(kFramesIn60Seconds, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(DecreaseRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds / 2 - 1, 1), + ::testing::Pair(kFramesIn60Seconds / 2, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(IncreaseRateLabel()), + ::testing::ElementsAre(::testing::Pair(kFramesIn60Seconds / 2, 2))); + + // Check volume change average stats. + EXPECT_METRIC_THAT( + metrics::Samples(UpdateAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(DecreaseAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); + EXPECT_METRIC_THAT( + metrics::Samples(IncreaseAverageLabel()), + ::testing::ElementsAre(::testing::Pair(2, 1), ::testing::Pair(3, 1))); +} +} // namespace + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeUpdateStatsForEmptyStats) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 0); + EXPECT_EQ(update_stats.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterNoVolumeChange) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 0); + EXPECT_EQ(update_stats.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeIncrease) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume + 4); + stats_reporter.UpdateStatistics(kInputVolume + 5); + const auto& update_stats = stats_reporter.volume_update_stats(); + EXPECT_EQ(update_stats.num_decreases, 0); + EXPECT_EQ(update_stats.sum_decreases, 0); + EXPECT_EQ(update_stats.num_increases, 2); + EXPECT_EQ(update_stats.sum_increases, 5); +} + +TEST_P(InputVolumeStatsReporterTest, + CheckVolumeUpdateStatsAfterVolumeDecrease) { + constexpr int kInputVolume = 10; + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume - 4); + stats_reporter.UpdateStatistics(kInputVolume - 5); + const auto& stats_update = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_update.num_decreases, 2); + EXPECT_EQ(stats_update.sum_decreases, 5); + EXPECT_EQ(stats_update.num_increases, 0); + EXPECT_EQ(stats_update.sum_increases, 0); +} + +TEST_P(InputVolumeStatsReporterTest, CheckVolumeUpdateStatsAfterReset) { + InputVolumeStatsReporter stats_reporter(InputVolumeType()); + constexpr int kInputVolume = 10; + stats_reporter.UpdateStatistics(kInputVolume); + // Update until the periodic reset. + for (int i = 0; i < kFramesIn60Seconds - 2; i += 2) { + stats_reporter.UpdateStatistics(kInputVolume + 2); + stats_reporter.UpdateStatistics(kInputVolume); + } + const auto& stats_before_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_before_reset.num_decreases, kFramesIn60Seconds / 2 - 1); + EXPECT_EQ(stats_before_reset.sum_decreases, kFramesIn60Seconds - 2); + EXPECT_EQ(stats_before_reset.num_increases, kFramesIn60Seconds / 2 - 1); + EXPECT_EQ(stats_before_reset.sum_increases, kFramesIn60Seconds - 2); + stats_reporter.UpdateStatistics(kInputVolume + 2); + const auto& stats_during_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_during_reset.num_decreases, 0); + EXPECT_EQ(stats_during_reset.sum_decreases, 0); + EXPECT_EQ(stats_during_reset.num_increases, 0); + EXPECT_EQ(stats_during_reset.sum_increases, 0); + stats_reporter.UpdateStatistics(kInputVolume); + stats_reporter.UpdateStatistics(kInputVolume + 3); + const auto& stats_after_reset = stats_reporter.volume_update_stats(); + EXPECT_EQ(stats_after_reset.num_decreases, 1); + EXPECT_EQ(stats_after_reset.sum_decreases, 2); + EXPECT_EQ(stats_after_reset.num_increases, 1); + EXPECT_EQ(stats_after_reset.sum_increases, 3); +} + +INSTANTIATE_TEST_SUITE_P(, + InputVolumeStatsReporterTest, + ::testing::Values(InputVolumeType::kApplied, + InputVolumeType::kRecommended)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc new file mode 100644 index 0000000000..bb6e038514 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { + +constexpr std::array + InterpolatedGainCurve::approximation_params_x_; + +constexpr std::array + InterpolatedGainCurve::approximation_params_m_; + +constexpr std::array + InterpolatedGainCurve::approximation_params_q_; + +InterpolatedGainCurve::InterpolatedGainCurve( + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix) + : region_logger_( + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Identity") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Knee") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Limiter") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix + << ".FixedDigitalGainCurveRegion.Saturation") + .str()), + apm_data_dumper_(apm_data_dumper) {} + +InterpolatedGainCurve::~InterpolatedGainCurve() { + if (stats_.available) { + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_identity", + stats_.look_ups_identity_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_knee", + stats_.look_ups_knee_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_limiter", + stats_.look_ups_limiter_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_saturation", + stats_.look_ups_saturation_region); + region_logger_.LogRegionStats(stats_); + } +} + +InterpolatedGainCurve::RegionLogger::RegionLogger( + absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name) + : identity_histogram( + metrics::HistogramFactoryGetCounts(identity_histogram_name, + 1, + 10000, + 50)), + knee_histogram(metrics::HistogramFactoryGetCounts(knee_histogram_name, + 1, + 10000, + 50)), + limiter_histogram( + metrics::HistogramFactoryGetCounts(limiter_histogram_name, + 1, + 10000, + 50)), + saturation_histogram( + metrics::HistogramFactoryGetCounts(saturation_histogram_name, + 1, + 10000, + 50)) {} + +InterpolatedGainCurve::RegionLogger::~RegionLogger() = default; + +void InterpolatedGainCurve::RegionLogger::LogRegionStats( + const InterpolatedGainCurve::Stats& stats) const { + using Region = InterpolatedGainCurve::GainCurveRegion; + const int duration_s = + stats.region_duration_frames / (1000 / kFrameDurationMs); + + switch (stats.region) { + case Region::kIdentity: { + if (identity_histogram) { + metrics::HistogramAdd(identity_histogram, duration_s); + } + break; + } + case Region::kKnee: { + if (knee_histogram) { + metrics::HistogramAdd(knee_histogram, duration_s); + } + break; + } + case Region::kLimiter: { + if (limiter_histogram) { + metrics::HistogramAdd(limiter_histogram, duration_s); + } + break; + } + case Region::kSaturation: { + if (saturation_histogram) { + metrics::HistogramAdd(saturation_histogram, duration_s); + } + break; + } + default: { + RTC_DCHECK_NOTREACHED(); + } + } +} + +void InterpolatedGainCurve::UpdateStats(float input_level) const { + stats_.available = true; + + GainCurveRegion region; + + if (input_level < approximation_params_x_[0]) { + stats_.look_ups_identity_region++; + region = GainCurveRegion::kIdentity; + } else if (input_level < + approximation_params_x_[kInterpolatedGainCurveKneePoints - 1]) { + stats_.look_ups_knee_region++; + region = GainCurveRegion::kKnee; + } else if (input_level < kMaxInputLevelLinear) { + stats_.look_ups_limiter_region++; + region = GainCurveRegion::kLimiter; + } else { + stats_.look_ups_saturation_region++; + region = GainCurveRegion::kSaturation; + } + + if (region == stats_.region) { + ++stats_.region_duration_frames; + } else { + region_logger_.LogRegionStats(stats_); + + stats_.region_duration_frames = 0; + stats_.region = region; + } +} + +// Looks up a gain to apply given a non-negative input level. +// The cost of this operation depends on the region in which `input_level` +// falls. +// For the identity and the saturation regions the cost is O(1). +// For the other regions, namely knee and limiter, the cost is +// O(2 + log2(`LightkInterpolatedGainCurveTotalPoints`), plus O(1) for the +// linear interpolation (one product and one sum). +float InterpolatedGainCurve::LookUpGainToApply(float input_level) const { + UpdateStats(input_level); + + if (input_level <= approximation_params_x_[0]) { + // Identity region. + return 1.0f; + } + + if (input_level >= kMaxInputLevelLinear) { + // Saturating lower bound. The saturing samples exactly hit the clipping + // level. This method achieves has the lowest harmonic distorsion, but it + // may reduce the amplitude of the non-saturating samples too much. + return 32768.f / input_level; + } + + // Knee and limiter regions; find the linear piece index. Spelling + // out the complete type was the only way to silence both the clang + // plugin and the windows compilers. + std::array::const_iterator it = + std::lower_bound(approximation_params_x_.begin(), + approximation_params_x_.end(), input_level); + const size_t index = std::distance(approximation_params_x_.begin(), it) - 1; + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, approximation_params_m_.size()); + RTC_DCHECK_LE(approximation_params_x_[index], input_level); + if (index < approximation_params_m_.size() - 1) { + RTC_DCHECK_LE(input_level, approximation_params_x_[index + 1]); + } + + // Piece-wise linear interploation. + const float gain = approximation_params_m_[index] * input_level + + approximation_params_q_[index]; + RTC_DCHECK_LE(0.f, gain); + return gain; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h new file mode 100644 index 0000000000..8dd3e48f21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/gtest_prod_util.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +class ApmDataDumper; + +constexpr float kInputLevelScalingFactor = 32768.0f; + +// Defined as DbfsToLinear(kLimiterMaxInputLevelDbFs) +constexpr float kMaxInputLevelLinear = static_cast(36766.300710566735); + +// Interpolated gain curve using under-approximation to avoid saturation. +// +// The goal of this class is allowing fast look ups to get an accurate +// estimates of the gain to apply given an estimated input level. +class InterpolatedGainCurve { + public: + enum class GainCurveRegion { + kIdentity = 0, + kKnee = 1, + kLimiter = 2, + kSaturation = 3 + }; + + struct Stats { + // Region in which the output level equals the input one. + size_t look_ups_identity_region = 0; + // Smoothing between the identity and the limiter regions. + size_t look_ups_knee_region = 0; + // Limiter region in which the output and input levels are linearly related. + size_t look_ups_limiter_region = 0; + // Region in which saturation may occur since the input level is beyond the + // maximum expected by the limiter. + size_t look_ups_saturation_region = 0; + // True if stats have been populated. + bool available = false; + + // The current region, and for how many frames the level has been + // in that region. + GainCurveRegion region = GainCurveRegion::kIdentity; + int64_t region_duration_frames = 0; + }; + + InterpolatedGainCurve(ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + ~InterpolatedGainCurve(); + + InterpolatedGainCurve(const InterpolatedGainCurve&) = delete; + InterpolatedGainCurve& operator=(const InterpolatedGainCurve&) = delete; + + Stats get_stats() const { return stats_; } + + // Given a non-negative input level (linear scale), a scalar factor to apply + // to a sub-frame is returned. + // Levels above kLimiterMaxInputLevelDbFs will be reduced to 0 dBFS + // after applying this gain + float LookUpGainToApply(float input_level) const; + + private: + // For comparing 'approximation_params_*_' with ones computed by + // ComputeInterpolatedGainCurve. + FRIEND_TEST_ALL_PREFIXES(GainController2InterpolatedGainCurve, + CheckApproximationParams); + + struct RegionLogger { + metrics::Histogram* identity_histogram; + metrics::Histogram* knee_histogram; + metrics::Histogram* limiter_histogram; + metrics::Histogram* saturation_histogram; + + RegionLogger(absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name); + + ~RegionLogger(); + + void LogRegionStats(const InterpolatedGainCurve::Stats& stats) const; + } region_logger_; + + void UpdateStats(float input_level) const; + + ApmDataDumper* const apm_data_dumper_; + + static constexpr std::array + approximation_params_x_ = { + {30057.296875, 30148.986328125, 30240.67578125, 30424.052734375, + 30607.4296875, 30790.806640625, 30974.18359375, 31157.560546875, + 31340.939453125, 31524.31640625, 31707.693359375, 31891.0703125, + 32074.447265625, 32257.82421875, 32441.201171875, 32624.580078125, + 32807.95703125, 32991.33203125, 33174.7109375, 33358.08984375, + 33541.46484375, 33724.84375, 33819.53515625, 34009.5390625, + 34200.05859375, 34389.81640625, 34674.48828125, 35054.375, + 35434.86328125, 35814.81640625, 36195.16796875, 36575.03125}}; + static constexpr std::array + approximation_params_m_ = { + {-3.515235675877192989e-07, -1.050251626111275982e-06, + -2.085213736791047268e-06, -3.443004743530764244e-06, + -4.773849468620028347e-06, -6.077375928725814447e-06, + -7.353257842623861507e-06, -8.601219633419532329e-06, + -9.821013009059242904e-06, -1.101243378798244521e-05, + -1.217532644659513608e-05, -1.330956911260727793e-05, + -1.441507538402220234e-05, -1.549179251014720649e-05, + -1.653970684856176376e-05, -1.755882840370759368e-05, + -1.854918446042574942e-05, -1.951086778717581183e-05, + -2.044398024736437947e-05, -2.1348627342376858e-05, + -2.222496914328075945e-05, -2.265374678245279938e-05, + -2.242570917587727308e-05, -2.220122041762806475e-05, + -2.19802095671184361e-05, -2.176260204578284174e-05, + -2.133731686626560986e-05, -2.092481918225530535e-05, + -2.052459603874012828e-05, -2.013615448959171772e-05, + -1.975903069251216948e-05, -1.939277899509761482e-05}}; + + static constexpr std::array + approximation_params_q_ = { + {1.010565876960754395, 1.031631827354431152, 1.062929749488830566, + 1.104239225387573242, 1.144973039627075195, 1.185109615325927734, + 1.224629044532775879, 1.263512492179870605, 1.301741957664489746, + 1.339300632476806641, 1.376173257827758789, 1.412345528602600098, + 1.447803974151611328, 1.482536554336547852, 1.516532182693481445, + 1.549780607223510742, 1.582272171974182129, 1.613999366760253906, + 1.644955039024353027, 1.675132393836975098, 1.704526185989379883, + 1.718986630439758301, 1.711274504661560059, 1.703639745712280273, + 1.696081161499023438, 1.688597679138183594, 1.673851132392883301, + 1.659391283988952637, 1.645209431648254395, 1.631297469139099121, + 1.617647409439086914, 1.604251742362976074}}; + + // Stats. + mutable Stats stats_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc new file mode 100644 index 0000000000..7861ae997d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr double kLevelEpsilon = 1e-2 * kMaxAbsFloatS16Value; +constexpr float kInterpolatedGainCurveTolerance = 1.f / 32768.f; +ApmDataDumper apm_data_dumper(0); +static_assert(std::is_trivially_destructible::value, ""); +const LimiterDbGainCurve limiter; + +} // namespace + +TEST(GainController2InterpolatedGainCurve, CreateUse) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, DbfsToFloatS16(limiter.max_input_level_db() + 1), 500); + for (const auto level : levels) { + EXPECT_GE(igc.LookUpGainToApply(level), 0.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckValidOutput) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() * 2.0, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + const float gain = igc.LookUpGainToApply(level); + EXPECT_LE(0.0f, gain); + EXPECT_LE(gain, 1.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckMonotonicity) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() + kLevelEpsilon + 0.5, + 500); + float prev_gain = igc.LookUpGainToApply(0.0f); + for (const auto level : levels) { + const float gain = igc.LookUpGainToApply(level); + EXPECT_GE(prev_gain, gain); + prev_gain = gain; + } +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximation) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() - kLevelEpsilon, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LT( + std::fabs(limiter.GetGainLinear(level) - igc.LookUpGainToApply(level)), + kInterpolatedGainCurveTolerance); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckRegionBoundaries) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const std::vector levels{ + {kLevelEpsilon, limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon}}; + for (const auto level : levels) { + igc.LookUpGainToApply(level); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(1ul, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(1ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckIdentityRegion) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(kLevelEpsilon, limiter.knee_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_EQ(1.0f, igc.LookUpGainToApply(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationBeyondKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() - kLevelEpsilon, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(kNumSteps, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, + CheckNoOverApproximationWithSaturation) { + constexpr size_t kNumSteps = 3; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.max_input_level_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon + 0.5, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LE(igc.LookUpGainToApply(level), limiter.GetGainLinear(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(kNumSteps, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximationParams) { + test::InterpolatedParameters parameters = + test::ComputeInterpolatedGainCurveApproximationParams(); + + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + for (size_t i = 0; i < kInterpolatedGainCurveTotalPoints; ++i) { + // The tolerance levels are chosen to account for deviations due + // to computing with single precision floating point numbers. + EXPECT_NEAR(igc.approximation_params_x_[i], + parameters.computed_approximation_params_x[i], 0.9f); + EXPECT_NEAR(igc.approximation_params_m_[i], + parameters.computed_approximation_params_m[i], 0.00001f); + EXPECT_NEAR(igc.approximation_params_q_[i], + parameters.computed_approximation_params_q[i], 0.001f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc new file mode 100644 index 0000000000..7a1e2202be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// This constant affects the way scaling factors are interpolated for the first +// sub-frame of a frame. Only in the case in which the first sub-frame has an +// estimated level which is greater than the that of the previous analyzed +// sub-frame, linear interpolation is replaced with a power function which +// reduces the chances of over-shooting (and hence saturation), however reducing +// the fixed gain effectiveness. +constexpr float kAttackFirstSubframeInterpolationPower = 8.0f; + +void InterpolateFirstSubframe(float last_factor, + float current_factor, + rtc::ArrayView subframe) { + const int n = rtc::dchecked_cast(subframe.size()); + constexpr float p = kAttackFirstSubframeInterpolationPower; + for (int i = 0; i < n; ++i) { + subframe[i] = std::pow(1.f - i / n, p) * (last_factor - current_factor) + + current_factor; + } +} + +void ComputePerSampleSubframeFactors( + const std::array& scaling_factors, + int samples_per_channel, + rtc::ArrayView per_sample_scaling_factors) { + const int num_subframes = scaling_factors.size() - 1; + const int subframe_size = + rtc::CheckedDivExact(samples_per_channel, num_subframes); + + // Handle first sub-frame differently in case of attack. + const bool is_attack = scaling_factors[0] > scaling_factors[1]; + if (is_attack) { + InterpolateFirstSubframe( + scaling_factors[0], scaling_factors[1], + rtc::ArrayView( + per_sample_scaling_factors.subview(0, subframe_size))); + } + + for (int i = is_attack ? 1 : 0; i < num_subframes; ++i) { + const int subframe_start = i * subframe_size; + const float scaling_start = scaling_factors[i]; + const float scaling_end = scaling_factors[i + 1]; + const float scaling_diff = (scaling_end - scaling_start) / subframe_size; + for (int j = 0; j < subframe_size; ++j) { + per_sample_scaling_factors[subframe_start + j] = + scaling_start + scaling_diff * j; + } + } +} + +void ScaleSamples(rtc::ArrayView per_sample_scaling_factors, + AudioFrameView signal) { + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size()); + for (int i = 0; i < signal.num_channels(); ++i) { + rtc::ArrayView channel = signal.channel(i); + for (int j = 0; j < samples_per_channel; ++j) { + channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j], + kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void CheckLimiterSampleRate(int sample_rate_hz) { + // Check that per_sample_scaling_factors_ is large enough. + RTC_DCHECK_LE(sample_rate_hz, + kMaximalNumberOfSamplesPerChannel * 1000 / kFrameDurationMs); +} + +} // namespace + +Limiter::Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name) + : interp_gain_curve_(apm_data_dumper, histogram_name), + level_estimator_(sample_rate_hz, apm_data_dumper), + apm_data_dumper_(apm_data_dumper) { + CheckLimiterSampleRate(sample_rate_hz); +} + +Limiter::~Limiter() = default; + +void Limiter::Process(AudioFrameView signal) { + const std::array level_estimate = + level_estimator_.ComputeLevel(signal); + + RTC_DCHECK_EQ(level_estimate.size() + 1, scaling_factors_.size()); + scaling_factors_[0] = last_scaling_factor_; + std::transform(level_estimate.begin(), level_estimate.end(), + scaling_factors_.begin() + 1, [this](float x) { + return interp_gain_curve_.LookUpGainToApply(x); + }); + + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel); + + auto per_sample_scaling_factors = rtc::ArrayView( + &per_sample_scaling_factors_[0], samples_per_channel); + ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel, + per_sample_scaling_factors); + ScaleSamples(per_sample_scaling_factors, signal); + + last_scaling_factor_ = scaling_factors_.back(); + + // Dump data for debug. + apm_data_dumper_->DumpRaw("agc2_limiter_last_scaling_factor", + last_scaling_factor_); + apm_data_dumper_->DumpRaw( + "agc2_limiter_region", + static_cast(interp_gain_curve_.get_stats().region)); +} + +InterpolatedGainCurve::Stats Limiter::GetGainCurveStats() const { + return interp_gain_curve_.get_stats(); +} + +void Limiter::SetSampleRate(int sample_rate_hz) { + CheckLimiterSampleRate(sample_rate_hz); + level_estimator_.SetSampleRate(sample_rate_hz); +} + +void Limiter::Reset() { + level_estimator_.Reset(); +} + +float Limiter::LastAudioLevel() const { + return level_estimator_.LastAudioLevel(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h new file mode 100644 index 0000000000..d4d556349c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +class Limiter { + public: + Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + Limiter(const Limiter& limiter) = delete; + Limiter& operator=(const Limiter& limiter) = delete; + ~Limiter(); + + // Applies limiter and hard-clipping to `signal`. + void Process(AudioFrameView signal); + InterpolatedGainCurve::Stats GetGainCurveStats() const; + + // Supported rates must be + // * supported by FixedDigitalLevelEstimator + // * below kMaximalNumberOfSamplesPerChannel*1000/kFrameDurationMs + // so that samples_per_channel fit in the + // per_sample_scaling_factors_ array. + void SetSampleRate(int sample_rate_hz); + + // Resets the internal state. + void Reset(); + + float LastAudioLevel() const; + + private: + const InterpolatedGainCurve interp_gain_curve_; + FixedDigitalLevelEstimator level_estimator_; + ApmDataDumper* const apm_data_dumper_ = nullptr; + + // Work array containing the sub-frame scaling factors to be interpolated. + std::array scaling_factors_ = {}; + std::array + per_sample_scaling_factors_ = {}; + float last_scaling_factor_ = 1.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc new file mode 100644 index 0000000000..d47c0b2e17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +double ComputeKneeStart(double max_input_level_db, + double knee_smoothness_db, + double compression_ratio) { + RTC_CHECK_LT((compression_ratio - 1.0) * knee_smoothness_db / + (2.0 * compression_ratio), + max_input_level_db); + return -knee_smoothness_db / 2.0 - + max_input_level_db / (compression_ratio - 1.0); +} + +std::array ComputeKneeRegionPolynomial(double knee_start_dbfs, + double knee_smoothness_db, + double compression_ratio) { + const double a = (1.0 - compression_ratio) / + (2.0 * knee_smoothness_db * compression_ratio); + const double b = 1.0 - 2.0 * a * knee_start_dbfs; + const double c = a * knee_start_dbfs * knee_start_dbfs; + return {{a, b, c}}; +} + +double ComputeLimiterD1(double max_input_level_db, double compression_ratio) { + return (std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) * + (1.0 - compression_ratio) / compression_ratio) / + kMaxAbsFloatS16Value; +} + +constexpr double ComputeLimiterD2(double compression_ratio) { + return (1.0 - 2.0 * compression_ratio) / compression_ratio; +} + +double ComputeLimiterI2(double max_input_level_db, + double compression_ratio, + double gain_curve_limiter_i1) { + RTC_CHECK_NE(gain_curve_limiter_i1, 0.f); + return std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) / + gain_curve_limiter_i1 / + std::pow(kMaxAbsFloatS16Value, gain_curve_limiter_i1 - 1); +} + +} // namespace + +LimiterDbGainCurve::LimiterDbGainCurve() + : max_input_level_linear_(DbfsToFloatS16(max_input_level_db_)), + knee_start_dbfs_(ComputeKneeStart(max_input_level_db_, + knee_smoothness_db_, + compression_ratio_)), + knee_start_linear_(DbfsToFloatS16(knee_start_dbfs_)), + limiter_start_dbfs_(knee_start_dbfs_ + knee_smoothness_db_), + limiter_start_linear_(DbfsToFloatS16(limiter_start_dbfs_)), + knee_region_polynomial_(ComputeKneeRegionPolynomial(knee_start_dbfs_, + knee_smoothness_db_, + compression_ratio_)), + gain_curve_limiter_d1_( + ComputeLimiterD1(max_input_level_db_, compression_ratio_)), + gain_curve_limiter_d2_(ComputeLimiterD2(compression_ratio_)), + gain_curve_limiter_i1_(1.0 / compression_ratio_), + gain_curve_limiter_i2_(ComputeLimiterI2(max_input_level_db_, + compression_ratio_, + gain_curve_limiter_i1_)) { + static_assert(knee_smoothness_db_ > 0.0f, ""); + static_assert(compression_ratio_ > 1.0f, ""); + RTC_CHECK_GE(max_input_level_db_, knee_start_dbfs_ + knee_smoothness_db_); +} + +constexpr double LimiterDbGainCurve::max_input_level_db_; +constexpr double LimiterDbGainCurve::knee_smoothness_db_; +constexpr double LimiterDbGainCurve::compression_ratio_; + +double LimiterDbGainCurve::GetOutputLevelDbfs(double input_level_dbfs) const { + if (input_level_dbfs < knee_start_dbfs_) { + return input_level_dbfs; + } else if (input_level_dbfs < limiter_start_dbfs_) { + return GetKneeRegionOutputLevelDbfs(input_level_dbfs); + } + return GetCompressorRegionOutputLevelDbfs(input_level_dbfs); +} + +double LimiterDbGainCurve::GetGainLinear(double input_level_linear) const { + if (input_level_linear < knee_start_linear_) { + return 1.0; + } + return DbfsToFloatS16( + GetOutputLevelDbfs(FloatS16ToDbfs(input_level_linear))) / + input_level_linear; +} + +// Computes the first derivative of GetGainLinear() in `x`. +double LimiterDbGainCurve::GetGainFirstDerivativeLinear(double x) const { + // Beyond-knee region only. + RTC_CHECK_GE(x, limiter_start_linear_ - 1e-7 * kMaxAbsFloatS16Value); + return gain_curve_limiter_d1_ * + std::pow(x / kMaxAbsFloatS16Value, gain_curve_limiter_d2_); +} + +// Computes the integral of GetGainLinear() in the range [x0, x1]. +double LimiterDbGainCurve::GetGainIntegralLinear(double x0, double x1) const { + RTC_CHECK_LE(x0, x1); // Valid interval. + RTC_CHECK_GE(x0, limiter_start_linear_); // Beyond-knee region only. + auto limiter_integral = [this](const double& x) { + return gain_curve_limiter_i2_ * std::pow(x, gain_curve_limiter_i1_); + }; + return limiter_integral(x1) - limiter_integral(x0); +} + +double LimiterDbGainCurve::GetKneeRegionOutputLevelDbfs( + double input_level_dbfs) const { + return knee_region_polynomial_[0] * input_level_dbfs * input_level_dbfs + + knee_region_polynomial_[1] * input_level_dbfs + + knee_region_polynomial_[2]; +} + +double LimiterDbGainCurve::GetCompressorRegionOutputLevelDbfs( + double input_level_dbfs) const { + return (input_level_dbfs - max_input_level_db_) / compression_ratio_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h new file mode 100644 index 0000000000..9086e26739 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ + +#include + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +namespace webrtc { + +// A class for computing a limiter gain curve (in dB scale) given a set of +// hard-coded parameters (namely, kLimiterDbGainCurveMaxInputLevelDbFs, +// kLimiterDbGainCurveKneeSmoothnessDb, and +// kLimiterDbGainCurveCompressionRatio). The generated curve consists of four +// regions: identity (linear), knee (quadratic polynomial), compression +// (linear), saturation (linear). The aforementioned constants are used to shape +// the different regions. +class LimiterDbGainCurve { + public: + LimiterDbGainCurve(); + + double max_input_level_db() const { return max_input_level_db_; } + double max_input_level_linear() const { return max_input_level_linear_; } + double knee_start_linear() const { return knee_start_linear_; } + double limiter_start_linear() const { return limiter_start_linear_; } + + // These methods can be marked 'constexpr' in C++ 14. + double GetOutputLevelDbfs(double input_level_dbfs) const; + double GetGainLinear(double input_level_linear) const; + double GetGainFirstDerivativeLinear(double x) const; + double GetGainIntegralLinear(double x0, double x1) const; + + private: + double GetKneeRegionOutputLevelDbfs(double input_level_dbfs) const; + double GetCompressorRegionOutputLevelDbfs(double input_level_dbfs) const; + + static constexpr double max_input_level_db_ = test::kLimiterMaxInputLevelDbFs; + static constexpr double knee_smoothness_db_ = test::kLimiterKneeSmoothnessDb; + static constexpr double compression_ratio_ = test::kLimiterCompressionRatio; + + const double max_input_level_linear_; + + // Do not modify signal with level <= knee_start_dbfs_. + const double knee_start_dbfs_; + const double knee_start_linear_; + + // The upper end of the knee region, which is between knee_start_dbfs_ and + // limiter_start_dbfs_. + const double limiter_start_dbfs_; + const double limiter_start_linear_; + + // Coefficients {a, b, c} of the knee region polynomial + // ax^2 + bx + c in the DB scale. + const std::array knee_region_polynomial_; + + // Parameters for the computation of the first derivative of GetGainLinear(). + const double gain_curve_limiter_d1_; + const double gain_curve_limiter_d2_; + + // Parameters for the computation of the integral of GetGainLinear(). + const double gain_curve_limiter_i1_; + const double gain_curve_limiter_i2_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc new file mode 100644 index 0000000000..049c8d568e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(FixedDigitalGainController2Limiter, ConstructDestruct) { + LimiterDbGainCurve l; +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeMonotone) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(last_output_level, current_output_level); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeContinuous) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + constexpr float kMaxDelta = 0.5f; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(current_output_level, last_output_level + kMaxDelta); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, OutputGainShouldBeLessThanFullScale) { + LimiterDbGainCurve l; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + EXPECT_LE(current_output_level, 0.f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc new file mode 100644 index 0000000000..e662a7fc89 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(Limiter, LimiterShouldConstructAndRun) { + const int sample_rate_hz = 48000; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + kMaxAbsFloatS16Value); + limiter.Process(vectors_with_float_frame.float_frame_view()); +} + +TEST(Limiter, OutputVolumeAboveThreshold) { + const int sample_rate_hz = 48000; + const float input_level = + (kMaxAbsFloatS16Value + DbfsToFloatS16(test::kLimiterMaxInputLevelDbFs)) / + 2.f; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + // Give the level estimator time to adapt. + for (int i = 0; i < 5; ++i) { + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + } + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + rtc::ArrayView channel = + vectors_with_float_frame.float_frame_view().channel(0); + + for (const auto& sample : channel) { + EXPECT_LT(0.9f * kMaxAbsFloatS16Value, sample); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc new file mode 100644 index 0000000000..691513b509 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kFramesPerSecond = 100; + +float FrameEnergy(const AudioFrameView& audio) { + float energy = 0.0f; + for (int k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float EnergyToDbfs(float signal_energy, int num_samples) { + RTC_DCHECK_GE(signal_energy, 0.0f); + const float rms_square = signal_energy / num_samples; + constexpr float kMinDbfs = -90.30899869919436f; + if (rms_square <= 1.0f) { + return kMinDbfs; + } + return 10.0f * std::log10(rms_square) + kMinDbfs; +} + +// Updates the noise floor with instant decay and slow attack. This tuning is +// specific for AGC2, so that (i) it can promptly increase the gain if the noise +// floor drops (instant decay) and (ii) in case of music or fast speech, due to +// which the noise floor can be overestimated, the gain reduction is slowed +// down. +float SmoothNoiseFloorEstimate(float current_estimate, float new_estimate) { + constexpr float kAttack = 0.5f; + if (current_estimate < new_estimate) { + // Attack phase. + return kAttack * new_estimate + (1.0f - kAttack) * current_estimate; + } + // Instant attack. + return new_estimate; +} + +class NoiseFloorEstimator : public NoiseLevelEstimator { + public: + // Update the noise floor every 5 seconds. + static constexpr int kUpdatePeriodNumFrames = 500; + static_assert(kUpdatePeriodNumFrames >= 200, + "A too small value may cause noise level overestimation."); + static_assert(kUpdatePeriodNumFrames <= 1500, + "A too large value may make AGC2 slow at reacting to increased " + "noise levels."); + + NoiseFloorEstimator(ApmDataDumper* data_dumper) : data_dumper_(data_dumper) { + RTC_DCHECK(data_dumper_); + // Initially assume that 48 kHz will be used. `Analyze()` will detect the + // used sample rate and call `Initialize()` again if needed. + Initialize(/*sample_rate_hz=*/48000); + } + NoiseFloorEstimator(const NoiseFloorEstimator&) = delete; + NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete; + ~NoiseFloorEstimator() = default; + + float Analyze(const AudioFrameView& frame) override { + // Detect sample rate changes. + const int sample_rate_hz = + static_cast(frame.samples_per_channel() * kFramesPerSecond); + if (sample_rate_hz != sample_rate_hz_) { + Initialize(sample_rate_hz); + } + + const float frame_energy = FrameEnergy(frame); + if (frame_energy <= min_noise_energy_) { + // Ignore frames when muted or below the minimum measurable energy. + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + noise_energy_); + return EnergyToDbfs(noise_energy_, + static_cast(frame.samples_per_channel())); + } + + if (preliminary_noise_energy_set_) { + preliminary_noise_energy_ = + std::min(preliminary_noise_energy_, frame_energy); + } else { + preliminary_noise_energy_ = frame_energy; + preliminary_noise_energy_set_ = true; + } + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + preliminary_noise_energy_); + + if (counter_ == 0) { + // Full period observed. + first_period_ = false; + // Update the estimated noise floor energy with the preliminary + // estimation. + noise_energy_ = SmoothNoiseFloorEstimate( + /*current_estimate=*/noise_energy_, + /*new_estimate=*/preliminary_noise_energy_); + // Reset for a new observation period. + counter_ = kUpdatePeriodNumFrames; + preliminary_noise_energy_set_ = false; + } else if (first_period_) { + // While analyzing the signal during the initial period, continuously + // update the estimated noise energy, which is monotonic. + noise_energy_ = preliminary_noise_energy_; + counter_--; + } else { + // During the observation period it's only allowed to lower the energy. + noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_); + counter_--; + } + + float noise_rms_dbfs = EnergyToDbfs( + noise_energy_, static_cast(frame.samples_per_channel())); + if (data_dumper_) + data_dumper_->DumpRaw("agc2_noise_rms_dbfs", noise_rms_dbfs); + + return noise_rms_dbfs; + } + + private: + void Initialize(int sample_rate_hz) { + sample_rate_hz_ = sample_rate_hz; + first_period_ = true; + preliminary_noise_energy_set_ = false; + // Initialize the minimum noise energy to -84 dBFS. + min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond; + preliminary_noise_energy_ = min_noise_energy_; + noise_energy_ = min_noise_energy_; + counter_ = kUpdatePeriodNumFrames; + } + + ApmDataDumper* const data_dumper_; + int sample_rate_hz_; + float min_noise_energy_; + bool first_period_; + bool preliminary_noise_energy_set_; + float preliminary_noise_energy_; + float noise_energy_; + int counter_; +}; + +} // namespace + +std::unique_ptr CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper) { + return std::make_unique(data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h new file mode 100644 index 0000000000..9f3b957486 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +// Noise level estimator interface. +class NoiseLevelEstimator { + public: + virtual ~NoiseLevelEstimator() = default; + // Analyzes a 10 ms `frame`, updates the noise level estimation and returns + // the value for the latter in dBFS. + virtual float Analyze(const AudioFrameView& frame) = 0; +}; + +// Creates a noise level estimator based on noise floor detection. +std::unique_ptr CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build new file mode 100644 index 0000000000..ba000d3862 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("noise_level_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc new file mode 100644 index 0000000000..8168c5a229 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include +#include +#include +#include + +#include "api/function_view.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kNumIterations = 200; +constexpr int kFramesPerSecond = 100; + +// Runs the noise estimator on audio generated by 'sample_generator' +// for kNumIterations. Returns the last noise level estimate. +float RunEstimator(rtc::FunctionView sample_generator, + NoiseLevelEstimator& estimator, + int sample_rate_hz) { + const int samples_per_channel = + rtc::CheckedDivExact(sample_rate_hz, kFramesPerSecond); + VectorFloatFrame signal(1, samples_per_channel, 0.0f); + for (int i = 0; i < kNumIterations; ++i) { + AudioFrameView frame_view = signal.float_frame_view(); + for (int j = 0; j < samples_per_channel; ++j) { + frame_view.channel(0)[j] = sample_generator(); + } + estimator.Analyze(frame_view); + } + return estimator.Analyze(signal.float_frame_view()); +} + +class NoiseEstimatorParametrization : public ::testing::TestWithParam { + protected: + int sample_rate_hz() const { return GetParam(); } +}; + +// Checks that full scale white noise maps to about -5.5 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithRandomNoise) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16, + /*max_amplitude=*/test::kMaxS16); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -5.5f, 0.5f); +} + +// Checks that a full scale sine wave maps to about -3 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithSineTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f, + sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -3.0f, 0.1f); +} + +// Check that sufficiently spaced periodic pulses do not raise the estimated +// noise floor, which is determined by the amplitude of the non-pulse samples. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithPulseTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + constexpr float kNoPulseAmplitude = 10.0f; + test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, kNoPulseAmplitude, + /*frequency_hz=*/20.0f, sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + const float expected_noise_floor_dbfs = + 20.0f * std::log10f(kNoPulseAmplitude / test::kMaxS16); + EXPECT_NEAR(noise_level_dbfs, expected_noise_floor_dbfs, 0.5f); +} + +INSTANTIATE_TEST_SUITE_P(GainController2NoiseEstimator, + NoiseEstimatorParametrization, + ::testing::Values(8000, 16000, 32000, 48000)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn new file mode 100644 index 0000000000..d709eb3699 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -0,0 +1,334 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +rtc_library("rnn_vad") { + visibility = [ "../*" ] + sources = [ + "features_extraction.cc", + "features_extraction.h", + "rnn.cc", + "rnn.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + "..:biquad_filter", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] +} + +rtc_library("rnn_vad_auto_correlation") { + sources = [ + "auto_correlation.cc", + "auto_correlation.h", + ] + deps = [ + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_common") { + # TODO(alessiob): Make this target visibility private. + visibility = [ + ":*", + "..:vad_wrapper", + ] + sources = [ "common.h" ] + deps = [ + "../../../../rtc_base/system:arch", + "../../../../system_wrappers", + ] +} + +rtc_library("rnn_vad_lp_residual") { + sources = [ + "lp_residual.cc", + "lp_residual.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +rtc_source_set("rnn_vad_layers") { + sources = [ + "rnn_fc.cc", + "rnn_fc.h", + "rnn_gru.cc", + "rnn_gru.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../api:function_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("vector_math") { + sources = [ "vector_math.h" ] + deps = [ + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] +} + +if (current_cpu == "x86" || current_cpu == "x64") { + rtc_library("vector_math_avx2") { + sources = [ "vector_math_avx2.cc" ] + if (is_win && !build_with_mozilla) { + cflags = [ "/arch:AVX2" ] + } else { + cflags = [ + "-mavx2", + "-mfma", + ] + } + deps = [ + ":vector_math", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + ] + } +} + +rtc_library("rnn_vad_pitch") { + sources = [ + "pitch_search.cc", + "pitch_search.h", + "pitch_search_internal.cc", + "pitch_search_internal.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:gtest_prod", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } +} + +rtc_source_set("rnn_vad_ring_buffer") { + sources = [ "ring_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_source_set("rnn_vad_sequence_buffer") { + sources = [ "sequence_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_library("rnn_vad_spectral_features") { + sources = [ + "spectral_features.cc", + "spectral_features.h", + "spectral_features_internal.cc", + "spectral_features_internal.h", + ] + deps = [ + ":rnn_vad_common", + ":rnn_vad_ring_buffer", + ":rnn_vad_symmetric_matrix_buffer", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_symmetric_matrix_buffer") { + sources = [ "symmetric_matrix_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +if (rtc_include_tests) { + rtc_library("test_utils") { + testonly = true + sources = [ + "test_utils.cc", + "test_utils.h", + ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../api:scoped_refptr", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../test:fileutils", + "../../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + unittest_resources = [ + "../../../../resources/audio_processing/agc2/rnn_vad/band_energies.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_search_int.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/samples.pcm", + "../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat", + ] + + if (is_ios) { + bundle_data("unittests_bundle_data") { + testonly = true + sources = unittest_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_library("unittests") { + testonly = true + sources = [ + "auto_correlation_unittest.cc", + "features_extraction_unittest.cc", + "lp_residual_unittest.cc", + "pitch_search_internal_unittest.cc", + "pitch_search_unittest.cc", + "ring_buffer_unittest.cc", + "rnn_fc_unittest.cc", + "rnn_gru_unittest.cc", + "rnn_unittest.cc", + "rnn_vad_unittest.cc", + "sequence_buffer_unittest.cc", + "spectral_features_internal_unittest.cc", + "spectral_features_unittest.cc", + "symmetric_matrix_buffer_unittest.cc", + "vector_math_unittest.cc", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad", + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_ring_buffer", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + ":rnn_vad_symmetric_matrix_buffer", + ":test_utils", + ":vector_math", + "..:cpu_features", + "../..:audioproc_test_utils", + "../../../../api:array_view", + "../../../../common_audio/", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base:stringutils", + "../../../../rtc_base/system:arch", + "../../../../test:test_support", + "../../utility:pffft_wrapper", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/memory" ] + data = unittest_resources + if (is_ios) { + deps += [ ":unittests_bundle_data" ] + } + } + + if (!build_with_chromium) { + rtc_executable("rnn_vad_tool") { + testonly = true + sources = [ "rnn_vad_tool.cc" ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "..:cpu_features", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS new file mode 100644 index 0000000000..773c2d7edd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/rnnoise", +] diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc new file mode 100644 index 0000000000..3ddeec8dba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kAutoCorrelationFftOrder = 9; // Length-512 FFT. +static_assert(1 << kAutoCorrelationFftOrder > + kNumLags12kHz + kBufSize12kHz - kMaxPitch12kHz, + ""); + +} // namespace + +AutoCorrelationCalculator::AutoCorrelationCalculator() + : fft_(1 << kAutoCorrelationFftOrder, Pffft::FftType::kReal), + tmp_(fft_.CreateBuffer()), + X_(fft_.CreateBuffer()), + H_(fft_.CreateBuffer()) {} + +AutoCorrelationCalculator::~AutoCorrelationCalculator() = default; + +// The auto-correlations coefficients are computed as follows: +// |.........|...........| <- pitch buffer +// [ x (fixed) ] +// [ y_0 ] +// [ y_{m-1} ] +// x and y are sub-array of equal length; x is never moved, whereas y slides. +// The cross-correlation between y_0 and x corresponds to the auto-correlation +// for the maximum pitch period. Hence, the first value in `auto_corr` has an +// inverted lag equal to 0 that corresponds to a lag equal to the maximum +// pitch period. +void AutoCorrelationCalculator::ComputeOnPitchBuffer( + rtc::ArrayView pitch_buf, + rtc::ArrayView auto_corr) { + RTC_DCHECK_LT(auto_corr.size(), kMaxPitch12kHz); + RTC_DCHECK_GT(pitch_buf.size(), kMaxPitch12kHz); + constexpr int kFftFrameSize = 1 << kAutoCorrelationFftOrder; + constexpr int kConvolutionLength = kBufSize12kHz - kMaxPitch12kHz; + static_assert(kConvolutionLength == kFrameSize20ms12kHz, + "Mismatch between pitch buffer size, frame size and maximum " + "pitch period."); + static_assert(kFftFrameSize > kNumLags12kHz + kConvolutionLength, + "The FFT length is not sufficiently big to avoid cyclic " + "convolution errors."); + auto tmp = tmp_->GetView(); + + // Compute the FFT for the reversed reference frame - i.e., + // pitch_buf[-kConvolutionLength:]. + std::reverse_copy(pitch_buf.end() - kConvolutionLength, pitch_buf.end(), + tmp.begin()); + std::fill(tmp.begin() + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, H_.get(), /*ordered=*/false); + + // Compute the FFT for the sliding frames chunk. The sliding frames are + // defined as pitch_buf[i:i+kConvolutionLength] where i in + // [0, kNumLags12kHz). The chunk includes all of them, hence it is + // defined as pitch_buf[:kNumLags12kHz+kConvolutionLength]. + std::copy(pitch_buf.begin(), + pitch_buf.begin() + kConvolutionLength + kNumLags12kHz, + tmp.begin()); + std::fill(tmp.begin() + kNumLags12kHz + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, X_.get(), /*ordered=*/false); + + // Convolve in the frequency domain. + constexpr float kScalingFactor = 1.f / static_cast(kFftFrameSize); + std::fill(tmp.begin(), tmp.end(), 0.f); + fft_.FrequencyDomainConvolve(*X_, *H_, tmp_.get(), kScalingFactor); + fft_.BackwardTransform(*tmp_, tmp_.get(), /*ordered=*/false); + + // Extract the auto-correlation coefficients. + std::copy(tmp.begin() + kConvolutionLength - 1, + tmp.begin() + kConvolutionLength + kNumLags12kHz - 1, + auto_corr.begin()); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h new file mode 100644 index 0000000000..1ae5054567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute the auto correlation on the pitch buffer for a target pitch +// interval. +class AutoCorrelationCalculator { + public: + AutoCorrelationCalculator(); + AutoCorrelationCalculator(const AutoCorrelationCalculator&) = delete; + AutoCorrelationCalculator& operator=(const AutoCorrelationCalculator&) = + delete; + ~AutoCorrelationCalculator(); + + // Computes the auto-correlation coefficients for a target pitch interval. + // `auto_corr` indexes are inverted lags. + void ComputeOnPitchBuffer( + rtc::ArrayView pitch_buf, + rtc::ArrayView auto_corr); + + private: + Pffft fft_; + std::unique_ptr tmp_; + std::unique_ptr X_; + std::unique_ptr H_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc new file mode 100644 index 0000000000..76001ed7b7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the auto correlation function produces output within tolerance +// given test input data. +TEST(RnnVadTest, PitchBufferAutoCorrelationWithinTolerance) { + PitchTestData test_data; + std::array pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + auto auto_corr_view = test_data.AutoCorrelation12kHzView(); + ExpectNearAbsolute({auto_corr_view.data(), auto_corr_view.size()}, + computed_output, 3e-3f); +} + +// Checks that the auto correlation function computes the right thing for a +// simple use case. +TEST(RnnVadTest, CheckAutoCorrelationOnConstantPitchBuffer) { + // Create constant signal with no pitch. + std::array pitch_buf_decimated; + std::fill(pitch_buf_decimated.begin(), pitch_buf_decimated.end(), 1.f); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + // The expected output is a vector filled with the same expected + // auto-correlation value. The latter equals the length of a 20 ms frame. + constexpr int kFrameSize20ms12kHz = kFrameSize20ms24kHz / 2; + std::array expected_output; + std::fill(expected_output.begin(), expected_output.end(), + static_cast(kFrameSize20ms12kHz)); + ExpectNearAbsolute(expected_output, computed_output, 4e-5f); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h new file mode 100644 index 0000000000..c099373200 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ + +#include + +namespace webrtc { +namespace rnn_vad { + +constexpr double kPi = 3.14159265358979323846; + +constexpr int kSampleRate24kHz = 24000; +constexpr int kFrameSize10ms24kHz = kSampleRate24kHz / 100; +constexpr int kFrameSize20ms24kHz = kFrameSize10ms24kHz * 2; + +// Pitch buffer. +constexpr int kMinPitch24kHz = kSampleRate24kHz / 800; // 0.00125 s. +constexpr int kMaxPitch24kHz = kSampleRate24kHz / 62.5; // 0.016 s. +constexpr int kBufSize24kHz = kMaxPitch24kHz + kFrameSize20ms24kHz; +static_assert((kBufSize24kHz & 1) == 0, "The buffer size must be even."); + +// 24 kHz analysis. +// Define a higher minimum pitch period for the initial search. This is used to +// avoid searching for very short periods, for which a refinement step is +// responsible. +constexpr int kInitialMinPitch24kHz = 3 * kMinPitch24kHz; +static_assert(kMinPitch24kHz < kInitialMinPitch24kHz, ""); +static_assert(kInitialMinPitch24kHz < kMaxPitch24kHz, ""); +static_assert(kMaxPitch24kHz > kInitialMinPitch24kHz, ""); +// Number of (inverted) lags during the initial pitch search phase at 24 kHz. +constexpr int kInitialNumLags24kHz = kMaxPitch24kHz - kInitialMinPitch24kHz; +// Number of (inverted) lags during the pitch search refinement phase at 24 kHz. +constexpr int kRefineNumLags24kHz = kMaxPitch24kHz + 1; +static_assert( + kRefineNumLags24kHz > kInitialNumLags24kHz, + "The refinement step must search the pitch in an extended pitch range."); + +// 12 kHz analysis. +constexpr int kSampleRate12kHz = 12000; +constexpr int kFrameSize10ms12kHz = kSampleRate12kHz / 100; +constexpr int kFrameSize20ms12kHz = kFrameSize10ms12kHz * 2; +constexpr int kBufSize12kHz = kBufSize24kHz / 2; +constexpr int kInitialMinPitch12kHz = kInitialMinPitch24kHz / 2; +constexpr int kMaxPitch12kHz = kMaxPitch24kHz / 2; +static_assert(kMaxPitch12kHz > kInitialMinPitch12kHz, ""); +// The inverted lags for the pitch interval [`kInitialMinPitch12kHz`, +// `kMaxPitch12kHz`] are in the range [0, `kNumLags12kHz`]. +constexpr int kNumLags12kHz = kMaxPitch12kHz - kInitialMinPitch12kHz; + +// 48 kHz constants. +constexpr int kMinPitch48kHz = kMinPitch24kHz * 2; +constexpr int kMaxPitch48kHz = kMaxPitch24kHz * 2; + +// Spectral features. +constexpr int kNumBands = 22; +constexpr int kNumLowerBands = 6; +static_assert((0 < kNumLowerBands) && (kNumLowerBands < kNumBands), ""); +constexpr int kCepstralCoeffsHistorySize = 8; +static_assert(kCepstralCoeffsHistorySize > 2, + "The history size must at least be 3 to compute first and second " + "derivatives."); + +constexpr int kFeatureVectorSize = 42; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc new file mode 100644 index 0000000000..502023428d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kHpfConfig24k{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +} // namespace + +FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features) + : use_high_pass_filter_(false), + hpf_(kHpfConfig24k), + pitch_buf_24kHz_(), + pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()), + lp_residual_(kBufSize24kHz), + lp_residual_view_(lp_residual_.data(), kBufSize24kHz), + pitch_estimator_(cpu_features), + reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) { + RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size()); + Reset(); +} + +FeaturesExtractor::~FeaturesExtractor() = default; + +void FeaturesExtractor::Reset() { + pitch_buf_24kHz_.Reset(); + spectral_features_extractor_.Reset(); + if (use_high_pass_filter_) { + hpf_.Reset(); + } +} + +bool FeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView samples, + rtc::ArrayView feature_vector) { + // Pre-processing. + if (use_high_pass_filter_) { + std::array samples_filtered; + hpf_.Process(samples, samples_filtered); + // Feed buffer with the pre-processed version of `samples`. + pitch_buf_24kHz_.Push(samples_filtered); + } else { + // Feed buffer with `samples`. + pitch_buf_24kHz_.Push(samples); + } + // Extract the LP residual. + float lpc_coeffs[kNumLpcCoefficients]; + ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs); + ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_); + // Estimate pitch on the LP-residual and write the normalized pitch period + // into the output vector (normalization based on training data stats). + pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_); + feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300); + // Extract lagged frames (according to the estimated pitch period). + RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz); + auto lagged_frame = pitch_buf_24kHz_view_.subview( + kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz); + // Analyze reference and lagged frames checking if silence has been detected + // and write the feature vector. + return spectral_features_extractor_.CheckSilenceComputeFeatures( + reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz}, + {feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands}, + {feature_vector.data(), kNumLowerBands}, + {feature_vector.data() + kNumBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands}, + &feature_vector[kFeatureVectorSize - 1]); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h new file mode 100644 index 0000000000..d47a85bfb0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/biquad_filter.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +namespace webrtc { +namespace rnn_vad { + +// Feature extractor to feed the VAD RNN. +class FeaturesExtractor { + public: + explicit FeaturesExtractor(const AvailableCpuFeatures& cpu_features); + FeaturesExtractor(const FeaturesExtractor&) = delete; + FeaturesExtractor& operator=(const FeaturesExtractor&) = delete; + ~FeaturesExtractor(); + void Reset(); + // Analyzes the samples, computes the feature vector and returns true if + // silence is detected (false if not). When silence is detected, + // `feature_vector` is partially written and therefore must not be used to + // feed the VAD RNN. + bool CheckSilenceComputeFeatures( + rtc::ArrayView samples, + rtc::ArrayView feature_vector); + + private: + const bool use_high_pass_filter_; + // TODO(bugs.webrtc.org/7494): Remove HPF depending on how AGC2 is used in APM + // and on whether an HPF is already used as pre-processing step in APM. + BiQuadFilter hpf_; + SequenceBuffer + pitch_buf_24kHz_; + rtc::ArrayView pitch_buf_24kHz_view_; + std::vector lp_residual_; + rtc::ArrayView lp_residual_view_; + PitchEstimator pitch_estimator_; + rtc::ArrayView reference_frame_view_; + SpectralFeaturesExtractor spectral_features_extractor_; + int pitch_period_48kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc new file mode 100644 index 0000000000..96f956adfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int ceil(int n, int m) { + return (n + m - 1) / m; +} + +// Number of 10 ms frames required to fill a pitch buffer having size +// `kBufSize24kHz`. +constexpr int kNumTestDataFrames = ceil(kBufSize24kHz, kFrameSize10ms24kHz); +// Number of samples for the test data. +constexpr int kNumTestDataSize = kNumTestDataFrames * kFrameSize10ms24kHz; + +// Verifies that the pitch in Hz is in the detectable range. +bool PitchIsValid(float pitch_hz) { + const int pitch_period = static_cast(kSampleRate24kHz) / pitch_hz; + return kInitialMinPitch24kHz <= pitch_period && + pitch_period <= kMaxPitch24kHz; +} + +void CreatePureTone(float amplitude, float freq_hz, rtc::ArrayView dst) { + for (int i = 0; rtc::SafeLt(i, dst.size()); ++i) { + dst[i] = amplitude * std::sin(2.f * kPi * freq_hz * i / kSampleRate24kHz); + } +} + +// Feeds `features_extractor` with `samples` splitting it in 10 ms frames. +// For every frame, the output is written into `feature_vector`. Returns true +// if silence is detected in the last frame. +bool FeedTestData(FeaturesExtractor& features_extractor, + rtc::ArrayView samples, + rtc::ArrayView feature_vector) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + bool is_silence = true; + const int num_frames = samples.size() / kFrameSize10ms24kHz; + for (int i = 0; i < num_frames; ++i) { + is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples.data() + i * kFrameSize10ms24kHz, kFrameSize10ms24kHz}, + feature_vector); + } + return is_silence; +} + +// Extracts the features for two pure tones and verifies that the pitch field +// values reflect the known tone frequencies. +TEST(RnnVadTest, FeatureExtractionLowHighPitch) { + constexpr float amplitude = 1000.f; + constexpr float low_pitch_hz = 150.f; + constexpr float high_pitch_hz = 250.f; + ASSERT_TRUE(PitchIsValid(low_pitch_hz)); + ASSERT_TRUE(PitchIsValid(high_pitch_hz)); + + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::vector samples(kNumTestDataSize); + std::vector feature_vector(kFeatureVectorSize); + ASSERT_EQ(kFeatureVectorSize, rtc::dchecked_cast(feature_vector.size())); + rtc::ArrayView feature_vector_view( + feature_vector.data(), kFeatureVectorSize); + + // Extract the normalized scalar feature that is proportional to the estimated + // pitch period. + constexpr int pitch_feature_index = kFeatureVectorSize - 2; + // Low frequency tone - i.e., high period. + CreatePureTone(amplitude, low_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float high_pitch_period = feature_vector_view[pitch_feature_index]; + // High frequency tone - i.e., low period. + features_extractor.Reset(); + CreatePureTone(amplitude, high_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float low_pitch_period = feature_vector_view[pitch_feature_index]; + // Check. + EXPECT_LT(low_pitch_period, high_pitch_period); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc new file mode 100644 index 0000000000..484bfba459 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computes auto-correlation coefficients for `x` and writes them in +// `auto_corr`. The lag values are in {0, ..., max_lag - 1}, where max_lag +// equals the size of `auto_corr`. +void ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) { + constexpr int max_lag = auto_corr.size(); + RTC_DCHECK_LT(max_lag, x.size()); + for (int lag = 0; lag < max_lag; ++lag) { + auto_corr[lag] = + std::inner_product(x.begin(), x.end() - lag, x.begin() + lag, 0.f); + } +} + +// Applies denoising to the auto-correlation coefficients. +void DenoiseAutoCorrelation( + rtc::ArrayView auto_corr) { + // Assume -40 dB white noise floor. + auto_corr[0] *= 1.0001f; + // Hard-coded values obtained as + // [np.float32((0.008*0.008*i*i)) for i in range(1,5)]. + auto_corr[1] -= auto_corr[1] * 0.000064f; + auto_corr[2] -= auto_corr[2] * 0.000256f; + auto_corr[3] -= auto_corr[3] * 0.000576f; + auto_corr[4] -= auto_corr[4] * 0.001024f; + static_assert(kNumLpcCoefficients == 5, "Update `auto_corr`."); +} + +// Computes the initial inverse filter coefficients given the auto-correlation +// coefficients of an input frame. +void ComputeInitialInverseFilterCoefficients( + rtc::ArrayView auto_corr, + rtc::ArrayView lpc_coeffs) { + float error = auto_corr[0]; + for (int i = 0; i < kNumLpcCoefficients - 1; ++i) { + float reflection_coeff = 0.f; + for (int j = 0; j < i; ++j) { + reflection_coeff += lpc_coeffs[j] * auto_corr[i - j]; + } + reflection_coeff += auto_corr[i + 1]; + + // Avoid division by numbers close to zero. + constexpr float kMinErrorMagnitude = 1e-6f; + if (std::fabs(error) < kMinErrorMagnitude) { + error = std::copysign(kMinErrorMagnitude, error); + } + + reflection_coeff /= -error; + // Update LPC coefficients and total error. + lpc_coeffs[i] = reflection_coeff; + for (int j = 0; j < ((i + 1) >> 1); ++j) { + const float tmp1 = lpc_coeffs[j]; + const float tmp2 = lpc_coeffs[i - 1 - j]; + lpc_coeffs[j] = tmp1 + reflection_coeff * tmp2; + lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1; + } + error -= reflection_coeff * reflection_coeff * error; + if (error < 0.001f * auto_corr[0]) { + break; + } + } +} + +} // namespace + +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView x, + rtc::ArrayView lpc_coeffs) { + std::array auto_corr; + ComputeAutoCorrelation(x, auto_corr); + if (auto_corr[0] == 0.f) { // Empty frame. + std::fill(lpc_coeffs.begin(), lpc_coeffs.end(), 0); + return; + } + DenoiseAutoCorrelation(auto_corr); + std::array lpc_coeffs_pre{}; + ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre); + // LPC coefficients post-processing. + // TODO(bugs.webrtc.org/9076): Consider removing these steps. + lpc_coeffs_pre[0] *= 0.9f; + lpc_coeffs_pre[1] *= 0.9f * 0.9f; + lpc_coeffs_pre[2] *= 0.9f * 0.9f * 0.9f; + lpc_coeffs_pre[3] *= 0.9f * 0.9f * 0.9f * 0.9f; + constexpr float kC = 0.8f; + lpc_coeffs[0] = lpc_coeffs_pre[0] + kC; + lpc_coeffs[1] = lpc_coeffs_pre[1] + kC * lpc_coeffs_pre[0]; + lpc_coeffs[2] = lpc_coeffs_pre[2] + kC * lpc_coeffs_pre[1]; + lpc_coeffs[3] = lpc_coeffs_pre[3] + kC * lpc_coeffs_pre[2]; + lpc_coeffs[4] = kC * lpc_coeffs_pre[3]; + static_assert(kNumLpcCoefficients == 5, "Update `lpc_coeffs(_pre)`."); +} + +void ComputeLpResidual( + rtc::ArrayView lpc_coeffs, + rtc::ArrayView x, + rtc::ArrayView y) { + RTC_DCHECK_GT(x.size(), kNumLpcCoefficients); + RTC_DCHECK_EQ(x.size(), y.size()); + // The code below implements the following operation: + // y[i] = x[i] + dot_product({x[i], ..., x[i - kNumLpcCoefficients + 1]}, + // lpc_coeffs) + // Edge case: i < kNumLpcCoefficients. + y[0] = x[0]; + for (int i = 1; i < kNumLpcCoefficients; ++i) { + y[i] = + std::inner_product(x.crend() - i, x.crend(), lpc_coeffs.cbegin(), x[i]); + } + // Regular case. + auto last = x.crend(); + for (int i = kNumLpcCoefficients; rtc::SafeLt(i, y.size()); ++i, --last) { + y[i] = std::inner_product(last - kNumLpcCoefficients, last, + lpc_coeffs.cbegin(), x[i]); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h new file mode 100644 index 0000000000..d04c536ec1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear predictive coding (LPC) inverse filter length. +constexpr int kNumLpcCoefficients = 5; + +// Given a frame `x`, computes a post-processed version of LPC coefficients +// tailored for pitch estimation. +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView x, + rtc::ArrayView lpc_coeffs); + +// Computes the LP residual for the input frame `x` and the LPC coefficients +// `lpc_coeffs`. `y` and `x` can point to the same array for in-place +// computation. +void ComputeLpResidual( + rtc::ArrayView lpc_coeffs, + rtc::ArrayView x, + rtc::ArrayView y); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc new file mode 100644 index 0000000000..7b3a4a3f65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the LP residual can be computed on an empty frame. +TEST(RnnVadTest, LpResidualOfEmptyFrame) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // Input frame (empty, i.e., all samples set to 0). + std::array empty_frame; + empty_frame.fill(0.f); + // Compute inverse filter coefficients. + std::array lpc; + ComputeAndPostProcessLpcCoefficients(empty_frame, lpc); + // Compute LP residual. + std::array lp_residual; + ComputeLpResidual(lpc, empty_frame, lp_residual); +} + +// Checks that the computed LP residual is bit-exact given test input data. +TEST(RnnVadTest, LpResidualPipelineBitExactness) { + // Input and expected output readers. + ChunksFileReader pitch_buffer_reader = CreatePitchBuffer24kHzReader(); + ChunksFileReader lp_pitch_reader = CreateLpResidualAndPitchInfoReader(); + + // Buffers. + std::vector pitch_buffer_24kHz(kBufSize24kHz); + std::array lpc; + std::vector computed_lp_residual(kBufSize24kHz); + std::vector expected_lp_residual(kBufSize24kHz); + + // Test length. + const int num_frames = + std::min(pitch_buffer_reader.num_chunks, 300); // Max 3 s. + ASSERT_GE(lp_pitch_reader.num_chunks, num_frames); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + // Read input. + ASSERT_TRUE(pitch_buffer_reader.reader->ReadChunk(pitch_buffer_24kHz)); + // Read expected output (ignore pitch gain and period). + ASSERT_TRUE(lp_pitch_reader.reader->ReadChunk(expected_lp_residual)); + lp_pitch_reader.reader->SeekForward(2); // Pitch period and strength. + // Check every 200 ms. + if (i % 20 == 0) { + ComputeAndPostProcessLpcCoefficients(pitch_buffer_24kHz, lpc); + ComputeLpResidual(lpc, pitch_buffer_24kHz, computed_lp_residual); + ExpectNearAbsolute(expected_lp_residual, computed_lp_residual, kFloatMin); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc new file mode 100644 index 0000000000..419620fc0c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +PitchEstimator::PitchEstimator(const AvailableCpuFeatures& cpu_features) + : cpu_features_(cpu_features), + y_energy_24kHz_(kRefineNumLags24kHz, 0.f), + pitch_buffer_12kHz_(kBufSize12kHz), + auto_correlation_12kHz_(kNumLags12kHz) {} + +PitchEstimator::~PitchEstimator() = default; + +int PitchEstimator::Estimate( + rtc::ArrayView pitch_buffer) { + rtc::ArrayView pitch_buffer_12kHz_view( + pitch_buffer_12kHz_.data(), kBufSize12kHz); + RTC_DCHECK_EQ(pitch_buffer_12kHz_.size(), pitch_buffer_12kHz_view.size()); + rtc::ArrayView auto_correlation_12kHz_view( + auto_correlation_12kHz_.data(), kNumLags12kHz); + RTC_DCHECK_EQ(auto_correlation_12kHz_.size(), + auto_correlation_12kHz_view.size()); + + // TODO(bugs.chromium.org/10480): Use `cpu_features_` to estimate pitch. + // Perform the initial pitch search at 12 kHz. + Decimate2x(pitch_buffer, pitch_buffer_12kHz_view); + auto_corr_calculator_.ComputeOnPitchBuffer(pitch_buffer_12kHz_view, + auto_correlation_12kHz_view); + CandidatePitchPeriods pitch_periods = ComputePitchPeriod12kHz( + pitch_buffer_12kHz_view, auto_correlation_12kHz_view, cpu_features_); + // The refinement is done using the pitch buffer that contains 24 kHz samples. + // Therefore, adapt the inverted lags in `pitch_candidates_inv_lags` from 12 + // to 24 kHz. + pitch_periods.best *= 2; + pitch_periods.second_best *= 2; + + // Refine the initial pitch period estimation from 12 kHz to 48 kHz. + // Pre-compute frame energies at 24 kHz. + rtc::ArrayView y_energy_24kHz_view( + y_energy_24kHz_.data(), kRefineNumLags24kHz); + RTC_DCHECK_EQ(y_energy_24kHz_.size(), y_energy_24kHz_view.size()); + ComputeSlidingFrameSquareEnergies24kHz(pitch_buffer, y_energy_24kHz_view, + cpu_features_); + // Estimation at 48 kHz. + const int pitch_lag_48kHz = ComputePitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, pitch_periods, cpu_features_); + last_pitch_48kHz_ = ComputeExtendedPitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, + /*initial_pitch_period_48kHz=*/kMaxPitch48kHz - pitch_lag_48kHz, + last_pitch_48kHz_, cpu_features_); + return last_pitch_48kHz_.period; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h new file mode 100644 index 0000000000..42c448eb56 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { +namespace rnn_vad { + +// Pitch estimator. +class PitchEstimator { + public: + explicit PitchEstimator(const AvailableCpuFeatures& cpu_features); + PitchEstimator(const PitchEstimator&) = delete; + PitchEstimator& operator=(const PitchEstimator&) = delete; + ~PitchEstimator(); + // Returns the estimated pitch period at 48 kHz. + int Estimate(rtc::ArrayView pitch_buffer); + + private: + FRIEND_TEST_ALL_PREFIXES(RnnVadTest, PitchSearchWithinTolerance); + float GetLastPitchStrengthForTesting() const { + return last_pitch_48kHz_.strength; + } + + const AvailableCpuFeatures cpu_features_; + PitchInfo last_pitch_48kHz_{}; + AutoCorrelationCalculator auto_corr_calculator_; + std::vector y_energy_24kHz_; + std::vector pitch_buffer_12kHz_; + std::vector auto_correlation_12kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc new file mode 100644 index 0000000000..e8c912518d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include + +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +float ComputeAutoCorrelation( + int inverted_lag, + rtc::ArrayView pitch_buffer, + const VectorMath& vector_math) { + RTC_DCHECK_LT(inverted_lag, kBufSize24kHz); + RTC_DCHECK_LT(inverted_lag, kRefineNumLags24kHz); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + return vector_math.DotProduct( + pitch_buffer.subview(/*offset=*/kMaxPitch24kHz), + pitch_buffer.subview(inverted_lag, kFrameSize20ms24kHz)); +} + +// Given an auto-correlation coefficient `curr_auto_correlation` and its +// neighboring values `prev_auto_correlation` and `next_auto_correlation` +// computes a pseudo-interpolation offset to be applied to the pitch period +// associated to `curr`. The output is a lag in {-1, 0, +1}. +// TODO(bugs.webrtc.org/9076): Consider removing this method. +// `GetPitchPseudoInterpolationOffset()` it is relevant only if the spectral +// analysis works at a sample rate that is twice as that of the pitch buffer; +// In particular, it is not relevant for the estimated pitch period feature fed +// into the RNN. +int GetPitchPseudoInterpolationOffset(float prev_auto_correlation, + float curr_auto_correlation, + float next_auto_correlation) { + if ((next_auto_correlation - prev_auto_correlation) > + 0.7f * (curr_auto_correlation - prev_auto_correlation)) { + return 1; // `next_auto_correlation` is the largest auto-correlation + // coefficient. + } else if ((prev_auto_correlation - next_auto_correlation) > + 0.7f * (curr_auto_correlation - next_auto_correlation)) { + return -1; // `prev_auto_correlation` is the largest auto-correlation + // coefficient. + } + return 0; +} + +// Refines a pitch period `lag` encoded as lag with pseudo-interpolation. The +// output sample rate is twice as that of `lag`. +int PitchPseudoInterpolationLagPitchBuf( + int lag, + rtc::ArrayView pitch_buffer, + const VectorMath& vector_math) { + int offset = 0; + // Cannot apply pseudo-interpolation at the boundaries. + if (lag > 0 && lag < kMaxPitch24kHz) { + const int inverted_lag = kMaxPitch24kHz - lag; + offset = GetPitchPseudoInterpolationOffset( + ComputeAutoCorrelation(inverted_lag + 1, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag - 1, pitch_buffer, vector_math)); + } + return 2 * lag + offset; +} + +// Integer multipliers used in ComputeExtendedPitchPeriod48kHz() when +// looking for sub-harmonics. +// The values have been chosen to serve the following algorithm. Given the +// initial pitch period T, we examine whether one of its harmonics is the true +// fundamental frequency. We consider T/k with k in {2, ..., 15}. For each of +// these harmonics, in addition to the pitch strength of itself, we choose one +// multiple of its pitch period, n*T/k, to validate it (by averaging their pitch +// strengths). The multiplier n is chosen so that n*T/k is used only one time +// over all k. When for example k = 4, we should also expect a peak at 3*T/4. +// When k = 8 instead we don't want to look at 2*T/8, since we have already +// checked T/4 before. Instead, we look at T*3/8. +// The array can be generate in Python as follows: +// from fractions import Fraction +// # Smallest positive integer not in X. +// def mex(X): +// for i in range(1, int(max(X)+2)): +// if i not in X: +// return i +// # Visited multiples of the period. +// S = {1} +// for n in range(2, 16): +// sn = mex({n * i for i in S} | {1}) +// S = S | {Fraction(1, n), Fraction(sn, n)} +// print(sn, end=', ') +constexpr std::array kSubHarmonicMultipliers = { + {3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}}; + +struct Range { + int min; + int max; +}; + +// Number of analyzed pitches to the left(right) of a pitch candidate. +constexpr int kPitchNeighborhoodRadius = 2; + +// Creates a pitch period interval centered in `inverted_lag` with hard-coded +// radius. Clipping is applied so that the interval is always valid for a 24 kHz +// pitch buffer. +Range CreateInvertedLagRange(int inverted_lag) { + return {std::max(inverted_lag - kPitchNeighborhoodRadius, 0), + std::min(inverted_lag + kPitchNeighborhoodRadius, + kInitialNumLags24kHz - 1)}; +} + +constexpr int kNumPitchCandidates = 2; // Best and second best. +// Maximum number of analyzed pitch periods. +constexpr int kMaxPitchPeriods24kHz = + kNumPitchCandidates * (2 * kPitchNeighborhoodRadius + 1); + +// Collection of inverted lags. +class InvertedLagsIndex { + public: + InvertedLagsIndex() : num_entries_(0) {} + // Adds an inverted lag to the index. Cannot add more than + // `kMaxPitchPeriods24kHz` values. + void Append(int inverted_lag) { + RTC_DCHECK_LT(num_entries_, kMaxPitchPeriods24kHz); + inverted_lags_[num_entries_++] = inverted_lag; + } + const int* data() const { return inverted_lags_.data(); } + int size() const { return num_entries_; } + + private: + std::array inverted_lags_; + int num_entries_; +}; + +// Computes the auto correlation coefficients for the inverted lags in the +// closed interval `inverted_lags`. Updates `inverted_lags_index` by appending +// the inverted lags for the computed auto correlation values. +void ComputeAutoCorrelation( + Range inverted_lags, + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + InvertedLagsIndex& inverted_lags_index, + const VectorMath& vector_math) { + // Check valid range. + RTC_DCHECK_LE(inverted_lags.min, inverted_lags.max); + // Trick to avoid zero initialization of `auto_correlation`. + // Needed by the pseudo-interpolation. + if (inverted_lags.min > 0) { + auto_correlation[inverted_lags.min - 1] = 0.f; + } + if (inverted_lags.max < kInitialNumLags24kHz - 1) { + auto_correlation[inverted_lags.max + 1] = 0.f; + } + // Check valid `inverted_lag` indexes. + RTC_DCHECK_GE(inverted_lags.min, 0); + RTC_DCHECK_LT(inverted_lags.max, kInitialNumLags24kHz); + for (int inverted_lag = inverted_lags.min; inverted_lag <= inverted_lags.max; + ++inverted_lag) { + auto_correlation[inverted_lag] = + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math); + inverted_lags_index.Append(inverted_lag); + } +} + +// Searches the strongest pitch period at 24 kHz and returns its inverted lag at +// 48 kHz. +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView inverted_lags, + rtc::ArrayView auto_correlation, + rtc::ArrayView y_energy, + const VectorMath& vector_math) { + static_assert(kMaxPitch24kHz > kInitialNumLags24kHz, ""); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + int best_inverted_lag = 0; // Pitch period. + float best_numerator = -1.f; // Pitch strength numerator. + float best_denominator = 0.f; // Pitch strength denominator. + for (int inverted_lag : inverted_lags) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + // Auto-correlation energy normalized by frame energy. + const float numerator = + auto_correlation[inverted_lag] * auto_correlation[inverted_lag]; + const float denominator = y_energy[inverted_lag]; + // Compare numerator/denominator ratios without using divisions. + if (numerator * best_denominator > best_numerator * denominator) { + best_inverted_lag = inverted_lag; + best_numerator = numerator; + best_denominator = denominator; + } + } + } + // Pseudo-interpolation to transform `best_inverted_lag` (24 kHz pitch) to a + // 48 kHz pitch period. + if (best_inverted_lag == 0 || best_inverted_lag >= kInitialNumLags24kHz - 1) { + // Cannot apply pseudo-interpolation at the boundaries. + return best_inverted_lag * 2; + } + int offset = GetPitchPseudoInterpolationOffset( + auto_correlation[best_inverted_lag + 1], + auto_correlation[best_inverted_lag], + auto_correlation[best_inverted_lag - 1]); + // TODO(bugs.webrtc.org/9076): When retraining, check if `offset` below should + // be subtracted since `inverted_lag` is an inverted lag but offset is a lag. + return 2 * best_inverted_lag + offset; +} + +// Returns an alternative pitch period for `pitch_period` given a `multiplier` +// and a `divisor` of the period. +constexpr int GetAlternativePitchPeriod(int pitch_period, + int multiplier, + int divisor) { + RTC_DCHECK_GT(divisor, 0); + // Same as `round(multiplier * pitch_period / divisor)`. + return (2 * multiplier * pitch_period + divisor) / (2 * divisor); +} + +// Returns true if the alternative pitch period is stronger than the initial one +// given the last estimated pitch and the value of `period_divisor` used to +// compute the alternative pitch period via `GetAlternativePitchPeriod()`. +bool IsAlternativePitchStrongerThanInitial(PitchInfo last, + PitchInfo initial, + PitchInfo alternative, + int period_divisor) { + // Initial pitch period candidate thresholds for a sample rate of 24 kHz. + // Computed as [5*k*k for k in range(16)]. + constexpr std::array kInitialPitchPeriodThresholds = { + {20, 45, 80, 125, 180, 245, 320, 405, 500, 605, 720, 845, 980, 1125}}; + static_assert( + kInitialPitchPeriodThresholds.size() == kSubHarmonicMultipliers.size(), + ""); + RTC_DCHECK_GE(last.period, 0); + RTC_DCHECK_GE(initial.period, 0); + RTC_DCHECK_GE(alternative.period, 0); + RTC_DCHECK_GE(period_divisor, 2); + // Compute a term that lowers the threshold when `alternative.period` is close + // to the last estimated period `last.period` - i.e., pitch tracking. + float lower_threshold_term = 0.f; + if (std::abs(alternative.period - last.period) <= 1) { + // The candidate pitch period is within 1 sample from the last one. + // Make the candidate at `alternative.period` very easy to be accepted. + lower_threshold_term = last.strength; + } else if (std::abs(alternative.period - last.period) == 2 && + initial.period > + kInitialPitchPeriodThresholds[period_divisor - 2]) { + // The candidate pitch period is 2 samples far from the last one and the + // period `initial.period` (from which `alternative.period` has been + // derived) is greater than a threshold. Make `alternative.period` easy to + // be accepted. + lower_threshold_term = 0.5f * last.strength; + } + // Set the threshold based on the strength of the initial estimate + // `initial.period`. Also reduce the chance of false positives caused by a + // bias towards high frequencies (originating from short-term correlations). + float threshold = + std::max(0.3f, 0.7f * initial.strength - lower_threshold_term); + if (alternative.period < 3 * kMinPitch24kHz) { + // High frequency. + threshold = std::max(0.4f, 0.85f * initial.strength - lower_threshold_term); + } else if (alternative.period < 2 * kMinPitch24kHz) { + // Even higher frequency. + threshold = std::max(0.5f, 0.9f * initial.strength - lower_threshold_term); + } + return alternative.strength > threshold; +} + +} // namespace + +void Decimate2x(rtc::ArrayView src, + rtc::ArrayView dst) { + // TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter. + static_assert(2 * kBufSize12kHz == kBufSize24kHz, ""); + for (int i = 0; i < kBufSize12kHz; ++i) { + dst[i] = src[2 * i]; + } +} + +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + AvailableCpuFeatures cpu_features) { + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms24kHz < kBufSize24kHz, ""); + const auto frame_20ms_view = pitch_buffer.subview(0, kFrameSize20ms24kHz); + float yy = vector_math.DotProduct(frame_20ms_view, frame_20ms_view); + y_energy[0] = yy; + static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, ""); + static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, ""); + for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) { + yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag]; + yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] * + pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; + yy = std::max(1.f, yy); + y_energy[inverted_lag + 1] = yy; + } +} + +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + AvailableCpuFeatures cpu_features) { + static_assert(kMaxPitch12kHz > kNumLags12kHz, ""); + static_assert(kMaxPitch12kHz < kBufSize12kHz, ""); + + // Stores a pitch candidate period and strength information. + struct PitchCandidate { + // Pitch period encoded as inverted lag. + int period_inverted_lag = 0; + // Pitch strength encoded as a ratio. + float strength_numerator = -1.f; + float strength_denominator = 0.f; + // Compare the strength of two pitch candidates. + bool HasStrongerPitchThan(const PitchCandidate& b) const { + // Comparing the numerator/denominator ratios without using divisions. + return strength_numerator * b.strength_denominator > + b.strength_numerator * strength_denominator; + } + }; + + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms12kHz + 1 < kBufSize12kHz, ""); + const auto frame_view = pitch_buffer.subview(0, kFrameSize20ms12kHz + 1); + float denominator = 1.f + vector_math.DotProduct(frame_view, frame_view); + // Search best and second best pitches by looking at the scaled + // auto-correlation. + PitchCandidate best; + PitchCandidate second_best; + second_best.period_inverted_lag = 1; + for (int inverted_lag = 0; inverted_lag < kNumLags12kHz; ++inverted_lag) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + PitchCandidate candidate{ + inverted_lag, + auto_correlation[inverted_lag] * auto_correlation[inverted_lag], + denominator}; + if (candidate.HasStrongerPitchThan(second_best)) { + if (candidate.HasStrongerPitchThan(best)) { + second_best = best; + best = candidate; + } else { + second_best = candidate; + } + } + } + // Update `squared_energy_y` for the next inverted lag. + const float y_old = pitch_buffer[inverted_lag]; + const float y_new = pitch_buffer[inverted_lag + kFrameSize20ms12kHz]; + denominator -= y_old * y_old; + denominator += y_new * y_new; + denominator = std::max(0.f, denominator); + } + return {best.period_inverted_lag, second_best.period_inverted_lag}; +} + +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + CandidatePitchPeriods pitch_candidates, + AvailableCpuFeatures cpu_features) { + // Compute the auto-correlation terms only for neighbors of the two pitch + // candidates (best and second best). + std::array auto_correlation; + InvertedLagsIndex inverted_lags_index; + // Create two inverted lag ranges so that `r1` precedes `r2`. + const bool swap_candidates = + pitch_candidates.best > pitch_candidates.second_best; + const Range r1 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.second_best : pitch_candidates.best); + const Range r2 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.best : pitch_candidates.second_best); + // Check valid ranges. + RTC_DCHECK_LE(r1.min, r1.max); + RTC_DCHECK_LE(r2.min, r2.max); + // Check `r1` precedes `r2`. + RTC_DCHECK_LE(r1.min, r2.min); + RTC_DCHECK_LE(r1.max, r2.max); + VectorMath vector_math(cpu_features); + if (r1.max + 1 >= r2.min) { + // Overlapping or adjacent ranges. + ComputeAutoCorrelation({r1.min, r2.max}, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } else { + // Disjoint ranges. + ComputeAutoCorrelation(r1, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + ComputeAutoCorrelation(r2, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } + return ComputePitchPeriod48kHz(pitch_buffer, inverted_lags_index, + auto_correlation, y_energy, vector_math); +} + +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features) { + RTC_DCHECK_LE(kMinPitch48kHz, initial_pitch_period_48kHz); + RTC_DCHECK_LE(initial_pitch_period_48kHz, kMaxPitch48kHz); + + // Stores information for a refined pitch candidate. + struct RefinedPitchCandidate { + int period; + float strength; + // Additional strength data used for the final pitch estimation. + float xy; // Auto-correlation. + float y_energy; // Energy of the sliding frame `y`. + }; + + const float x_energy = y_energy[kMaxPitch24kHz]; + const auto pitch_strength = [x_energy](float xy, float y_energy) { + RTC_DCHECK_GE(x_energy * y_energy, 0.f); + return xy / std::sqrt(1.f + x_energy * y_energy); + }; + VectorMath vector_math(cpu_features); + + // Initialize the best pitch candidate with `initial_pitch_period_48kHz`. + RefinedPitchCandidate best_pitch; + best_pitch.period = + std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1); + best_pitch.xy = ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, + pitch_buffer, vector_math); + best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period]; + best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy); + // Keep a copy of the initial pitch candidate. + const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength}; + // 24 kHz version of the last estimated pitch. + const PitchInfo last_pitch{last_pitch_48kHz.period / 2, + last_pitch_48kHz.strength}; + + // Find `max_period_divisor` such that the result of + // `GetAlternativePitchPeriod(initial_pitch_period, 1, max_period_divisor)` + // equals `kMinPitch24kHz`. + const int max_period_divisor = + (2 * initial_pitch.period) / (2 * kMinPitch24kHz - 1); + for (int period_divisor = 2; period_divisor <= max_period_divisor; + ++period_divisor) { + PitchInfo alternative_pitch; + alternative_pitch.period = GetAlternativePitchPeriod( + initial_pitch.period, /*multiplier=*/1, period_divisor); + RTC_DCHECK_GE(alternative_pitch.period, kMinPitch24kHz); + // When looking at `alternative_pitch.period`, we also look at one of its + // sub-harmonics. `kSubHarmonicMultipliers` is used to know where to look. + // `period_divisor` == 2 is a special case since `dual_alternative_period` + // might be greater than the maximum pitch period. + int dual_alternative_period = GetAlternativePitchPeriod( + initial_pitch.period, kSubHarmonicMultipliers[period_divisor - 2], + period_divisor); + RTC_DCHECK_GT(dual_alternative_period, 0); + if (period_divisor == 2 && dual_alternative_period > kMaxPitch24kHz) { + dual_alternative_period = initial_pitch.period; + } + RTC_DCHECK_NE(alternative_pitch.period, dual_alternative_period) + << "The lower pitch period and the additional sub-harmonic must not " + "coincide."; + // Compute an auto-correlation score for the primary pitch candidate + // `alternative_pitch.period` by also looking at its possible sub-harmonic + // `dual_alternative_period`. + const float xy_primary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - alternative_pitch.period, pitch_buffer, vector_math); + // TODO(webrtc:10480): Copy `xy_primary_period` if the secondary period is + // equal to the primary one. + const float xy_secondary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - dual_alternative_period, pitch_buffer, vector_math); + const float xy = 0.5f * (xy_primary_period + xy_secondary_period); + const float yy = + 0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] + + y_energy[kMaxPitch24kHz - dual_alternative_period]); + alternative_pitch.strength = pitch_strength(xy, yy); + + // Maybe update best period. + if (IsAlternativePitchStrongerThanInitial( + last_pitch, initial_pitch, alternative_pitch, period_divisor)) { + best_pitch = {alternative_pitch.period, alternative_pitch.strength, xy, + yy}; + } + } + + // Final pitch strength and period. + best_pitch.xy = std::max(0.f, best_pitch.xy); + RTC_DCHECK_LE(0.f, best_pitch.y_energy); + float final_pitch_strength = + (best_pitch.y_energy <= best_pitch.xy) + ? 1.f + : best_pitch.xy / (best_pitch.y_energy + 1.f); + final_pitch_strength = std::min(best_pitch.strength, final_pitch_strength); + int final_pitch_period_48kHz = std::max( + kMinPitch48kHz, PitchPseudoInterpolationLagPitchBuf( + best_pitch.period, pitch_buffer, vector_math)); + + return {final_pitch_period_48kHz, final_pitch_strength}; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h new file mode 100644 index 0000000000..aa2dd13745 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// Performs 2x decimation without any anti-aliasing filter. +void Decimate2x(rtc::ArrayView src, + rtc::ArrayView dst); + +// Key concepts and keywords used below in this file. +// +// The pitch estimation relies on a pitch buffer, which is an array-like data +// structured designed as follows: +// +// |....A....|.....B.....| +// +// The part on the left, named `A` contains the oldest samples, whereas `B` +// contains the most recent ones. The size of `A` corresponds to the maximum +// pitch period, that of `B` to the analysis frame size (e.g., 16 ms and 20 ms +// respectively). +// +// Pitch estimation is essentially based on the analysis of two 20 ms frames +// extracted from the pitch buffer. One frame, called `x`, is kept fixed and +// corresponds to `B` - i.e., the most recent 20 ms. The other frame, called +// `y`, is extracted from different parts of the buffer instead. +// +// The offset between `x` and `y` corresponds to a specific pitch period. +// For instance, if `y` is positioned at the beginning of the pitch buffer, then +// the cross-correlation between `x` and `y` can be used as an indication of the +// strength for the maximum pitch. +// +// Such an offset can be encoded in two ways: +// - As a lag, which is the index in the pitch buffer for the first item in `y` +// - As an inverted lag, which is the number of samples from the beginning of +// `x` and the end of `y` +// +// |---->| lag +// |....A....|.....B.....| +// |<--| inverted lag +// |.....y.....| `y` 20 ms frame +// +// The inverted lag has the advantage of being directly proportional to the +// corresponding pitch period. + +// Computes the sum of squared samples for every sliding frame `y` in the pitch +// buffer. The indexes of `y_energy` are inverted lags. +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + AvailableCpuFeatures cpu_features); + +// Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags. +struct CandidatePitchPeriods { + int best; + int second_best; +}; + +// Computes the candidate pitch periods at 12 kHz given a view on the 12 kHz +// pitch buffer and the auto-correlation values (having inverted lags as +// indexes). +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView auto_correlation, + AvailableCpuFeatures cpu_features); + +// Computes the pitch period at 48 kHz given a view on the 24 kHz pitch buffer, +// the energies for the sliding frames `y` at 24 kHz and the pitch period +// candidates at 24 kHz (encoded as inverted lag). +int ComputePitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + CandidatePitchPeriods pitch_candidates_24kHz, + AvailableCpuFeatures cpu_features); + +struct PitchInfo { + int period; + float strength; +}; + +// Computes the pitch period at 48 kHz searching in an extended pitch range +// given a view on the 24 kHz pitch buffer, the energies for the sliding frames +// `y` at 24 kHz, the initial 48 kHz estimation (computed by +// `ComputePitchPeriod48kHz()`) and the last estimated pitch. +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView pitch_buffer, + rtc::ArrayView y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc new file mode 100644 index 0000000000..2a6e68f157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/strings/string_builder.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestPitchPeriodsLow = 3 * kMinPitch48kHz / 2; +constexpr int kTestPitchPeriodsHigh = (3 * kMinPitch48kHz + kMaxPitch48kHz) / 2; + +constexpr float kTestPitchStrengthLow = 0.35f; +constexpr float kTestPitchStrengthHigh = 0.75f; + +template +std::string PrintTestIndexAndCpuFeatures( + const ::testing::TestParamInfo& info) { + rtc::StringBuilder builder; + builder << info.index << "_" << info.param.cpu_features.ToString(); + return builder.str(); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + return v; +} + +// Checks that the frame-wise sliding square energy function produces output +// within tolerance given test input data. +TEST(RnnVadTest, ComputeSlidingFrameSquareEnergies24kHzWithinTolerance) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array computed_output; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + computed_output, cpu_features); + auto square_energies_view = test_data.SquareEnergies24kHzView(); + ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()}, + computed_output, 1e-3f); +} + +// Checks that the estimated pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod12kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + CandidatePitchPeriods pitch_candidates; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + pitch_candidates = ComputePitchPeriod12kHz( + pitch_buf_decimated, test_data.AutoCorrelation12kHzView(), cpu_features); + EXPECT_EQ(pitch_candidates.best, 140); + EXPECT_EQ(pitch_candidates.second_best, 142); +} + +// Checks that the refined pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod48kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{280, 284}, cpu_features), + 560); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{260, 284}, cpu_features), + 568); +} + +struct PitchCandidatesParameters { + CandidatePitchPeriods pitch_candidates; + AvailableCpuFeatures cpu_features; +}; + +class PitchCandidatesParametrization + : public ::testing::TestWithParam {}; + +// Checks that the result of `ComputePitchPeriod48kHz()` does not depend on the +// order of the input pitch candidates. +TEST_P(PitchCandidatesParametrization, + ComputePitchPeriod48kHzOrderDoesNotMatter) { + const PitchCandidatesParameters params = GetParam(); + const CandidatePitchPeriods swapped_pitch_candidates{ + params.pitch_candidates.second_best, params.pitch_candidates.best}; + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + params.pitch_candidates, params.cpu_features), + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + swapped_pitch_candidates, params.cpu_features)); +} + +std::vector CreatePitchCandidatesParameters() { + std::vector v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + v.push_back({{0, 2}, cpu_features}); + v.push_back({{260, 284}, cpu_features}); + v.push_back({{280, 284}, cpu_features}); + v.push_back( + {{kInitialNumLags24kHz - 2, kInitialNumLags24kHz - 1}, cpu_features}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + PitchCandidatesParametrization, + ::testing::ValuesIn(CreatePitchCandidatesParameters()), + PrintTestIndexAndCpuFeatures); + +struct ExtendedPitchPeriodSearchParameters { + int initial_pitch_period; + PitchInfo last_pitch; + PitchInfo expected_pitch; + AvailableCpuFeatures cpu_features; +}; + +class ExtendedPitchPeriodSearchParametrizaion + : public ::testing::TestWithParam {}; + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch strength is within tolerance given test input data. +TEST_P(ExtendedPitchPeriodSearchParametrizaion, + PeriodBitExactnessGainWithinTolerance) { + const ExtendedPitchPeriodSearchParameters params = GetParam(); + + PitchTestData test_data; + std::vector y_energy(kRefineNumLags24kHz); + rtc::ArrayView y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + const auto computed_output = ComputeExtendedPitchPeriod48kHz( + test_data.PitchBuffer24kHzView(), y_energy_view, + params.initial_pitch_period, params.last_pitch, params.cpu_features); + EXPECT_EQ(params.expected_pitch.period, computed_output.period); + EXPECT_NEAR(params.expected_pitch.strength, computed_output.strength, 1e-6f); +} + +std::vector +CreateExtendedPitchPeriodSearchParameters() { + std::vector v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + for (int last_pitch_period : + {kTestPitchPeriodsLow, kTestPitchPeriodsHigh}) { + for (float last_pitch_strength : + {kTestPitchStrengthLow, kTestPitchStrengthHigh}) { + v.push_back({kTestPitchPeriodsLow, + {last_pitch_period, last_pitch_strength}, + {91, -0.0188608f}, + cpu_features}); + v.push_back({kTestPitchPeriodsHigh, + {last_pitch_period, last_pitch_strength}, + {475, -0.0904344f}, + cpu_features}); + } + } + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + ExtendedPitchPeriodSearchParametrizaion, + ::testing::ValuesIn(CreateExtendedPitchPeriodSearchParameters()), + PrintTestIndexAndCpuFeatures); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc new file mode 100644 index 0000000000..79b44b995c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch gain is within tolerance given test input data. +TEST(RnnVadTest, PitchSearchWithinTolerance) { + ChunksFileReader reader = CreateLpResidualAndPitchInfoReader(); + const int num_frames = std::min(reader.num_chunks, 300); // Max 3 s. + std::vector lp_residual(kBufSize24kHz); + float expected_pitch_period, expected_pitch_strength; + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + PitchEstimator pitch_estimator(cpu_features); + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + ASSERT_TRUE(reader.reader->ReadChunk(lp_residual)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_period)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_strength)); + int pitch_period = + pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz}); + EXPECT_EQ(expected_pitch_period, pitch_period); + EXPECT_NEAR(expected_pitch_strength, + pitch_estimator.GetLastPitchStrengthForTesting(), 15e-6f); + } + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h new file mode 100644 index 0000000000..a6f7fdd1a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ + +#include +#include +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Ring buffer for N arrays of type T each one with size S. +template +class RingBuffer { + static_assert(S > 0, ""); + static_assert(N > 0, ""); + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + + public: + RingBuffer() : tail_(0) {} + RingBuffer(const RingBuffer&) = delete; + RingBuffer& operator=(const RingBuffer&) = delete; + ~RingBuffer() = default; + // Set the ring buffer values to zero. + void Reset() { buffer_.fill(0); } + // Replace the least recently pushed array in the buffer with `new_values`. + void Push(rtc::ArrayView new_values) { + std::memcpy(buffer_.data() + S * tail_, new_values.data(), S * sizeof(T)); + tail_ += 1; + if (tail_ == N) + tail_ = 0; + } + // Return an array view onto the array with a given delay. A view on the last + // and least recently push array is returned when `delay` is 0 and N - 1 + // respectively. + rtc::ArrayView GetArrayView(int delay) const { + RTC_DCHECK_LE(0, delay); + RTC_DCHECK_LT(delay, N); + int offset = tail_ - 1 - delay; + if (offset < 0) + offset += N; + return {buffer_.data() + S * offset, S}; + } + + private: + int tail_; // Index of the least recently pushed sub-array. + std::array buffer_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc new file mode 100644 index 0000000000..d11d4eac3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Compare the elements of two given array views. +template +void ExpectEq(rtc::ArrayView a, rtc::ArrayView b) { + for (int i = 0; i < S; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(a[i], b[i]); + } +} + +// Test push/read sequences. +template +void TestRingBuffer() { + SCOPED_TRACE(N); + SCOPED_TRACE(S); + std::array prev_pushed_array; + std::array pushed_array; + rtc::ArrayView pushed_array_view(pushed_array.data(), S); + + // Init. + RingBuffer ring_buf; + ring_buf.GetArrayView(0); + pushed_array.fill(0); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + + // Push N times and check most recent and second most recent. + for (T v = 1; v <= static_cast(N); ++v) { + SCOPED_TRACE(v); + prev_pushed_array = pushed_array; + pushed_array.fill(v); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + if (N > 1) { + pushed_array.fill(v - 1); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(1)); + } + } + + // Check buffer. + for (int delay = 2; delay < N; ++delay) { + SCOPED_TRACE(delay); + T expected_value = N - static_cast(delay); + pushed_array.fill(expected_value); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(delay)); + } +} + +// Check that for different delays, different views are returned. +TEST(RnnVadTest, RingBufferArrayViews) { + constexpr int s = 3; + constexpr int n = 4; + RingBuffer ring_buf; + std::array pushed_array; + pushed_array.fill(1); + for (int k = 0; k <= n; ++k) { // Push data n + 1 times. + SCOPED_TRACE(k); + // Check array views. + for (int i = 0; i < n; ++i) { + SCOPED_TRACE(i); + auto view_i = ring_buf.GetArrayView(i); + for (int j = i + 1; j < n; ++j) { + SCOPED_TRACE(j); + auto view_j = ring_buf.GetArrayView(j); + EXPECT_NE(view_i, view_j); + } + } + ring_buf.Push(pushed_array); + } +} + +TEST(RnnVadTest, RingBufferUnsigned) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +TEST(RnnVadTest, RingBufferSigned) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +TEST(RnnVadTest, RingBufferFloating) { + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); + TestRingBuffer(); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc new file mode 100644 index 0000000000..475bef9775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "rtc_base/checks.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputLayerInputSize; +static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerOutputSize; +static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +using ::rnnoise::kHiddenGruBias; +using ::rnnoise::kHiddenGruRecurrentWeights; +using ::rnnoise::kHiddenGruWeights; +using ::rnnoise::kHiddenLayerOutputSize; +static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, ""); + +using ::rnnoise::kOutputDenseBias; +using ::rnnoise::kOutputDenseWeights; +using ::rnnoise::kOutputLayerOutputSize; +static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +} // namespace + +RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) + : input_(kInputLayerInputSize, + kInputLayerOutputSize, + kInputDenseBias, + kInputDenseWeights, + ActivationFunction::kTansigApproximated, + cpu_features, + /*layer_name=*/"FC1"), + hidden_(kInputLayerOutputSize, + kHiddenLayerOutputSize, + kHiddenGruBias, + kHiddenGruWeights, + kHiddenGruRecurrentWeights, + cpu_features, + /*layer_name=*/"GRU1"), + output_(kHiddenLayerOutputSize, + kOutputLayerOutputSize, + kOutputDenseBias, + kOutputDenseWeights, + ActivationFunction::kSigmoidApproximated, + // The output layer is just 24x1. The unoptimized code is faster. + NoAvailableCpuFeatures(), + /*layer_name=*/"FC2") { + // Input-output chaining size checks. + RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) + << "The input and the hidden layers sizes do not match."; + RTC_DCHECK_EQ(hidden_.size(), output_.input_size()) + << "The hidden and the output layers sizes do not match."; +} + +RnnVad::~RnnVad() = default; + +void RnnVad::Reset() { + hidden_.Reset(); +} + +float RnnVad::ComputeVadProbability( + rtc::ArrayView feature_vector, + bool is_silence) { + if (is_silence) { + Reset(); + return 0.f; + } + input_.ComputeOutput(feature_vector); + hidden_.ComputeOutput(input_); + output_.ComputeOutput(hidden_); + RTC_DCHECK_EQ(output_.size(), 1); + return output_.data()[0]; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h new file mode 100644 index 0000000000..3148f1b3ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +namespace webrtc { +namespace rnn_vad { + +// Recurrent network with hard-coded architecture and weights for voice activity +// detection. +class RnnVad { + public: + explicit RnnVad(const AvailableCpuFeatures& cpu_features); + RnnVad(const RnnVad&) = delete; + RnnVad& operator=(const RnnVad&) = delete; + ~RnnVad(); + void Reset(); + // Observes `feature_vector` and `is_silence`, updates the RNN and returns the + // current voice probability. + float ComputeVadProbability( + rtc::ArrayView feature_vector, + bool is_silence); + + private: + FullyConnectedLayer input_; + GatedRecurrentLayer hidden_; + FullyConnectedLayer output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc new file mode 100644 index 0000000000..91501fb6e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +std::vector GetScaledParams(rtc::ArrayView params) { + std::vector scaled_params(params.size()); + std::transform(params.begin(), params.end(), scaled_params.begin(), + [](int8_t x) -> float { + return ::rnnoise::kWeightsScale * static_cast(x); + }); + return scaled_params; +} + +// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this +// function to improve setup time. +// Casts and scales `weights` and re-arranges the layout. +std::vector PreprocessWeights(rtc::ArrayView weights, + int output_size) { + if (output_size == 1) { + return GetScaledParams(weights); + } + // Transpose, scale and cast. + const int input_size = rtc::CheckedDivExact( + rtc::dchecked_cast(weights.size()), output_size); + std::vector w(weights.size()); + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < input_size; ++i) { + w[o * input_size + i] = rnnoise::kWeightsScale * + static_cast(weights[i * output_size + o]); + } + } + return w; +} + +rtc::FunctionView GetActivationFunction( + ActivationFunction activation_function) { + switch (activation_function) { + case ActivationFunction::kTansigApproximated: + return ::rnnoise::TansigApproximated; + case ActivationFunction::kSigmoidApproximated: + return ::rnnoise::SigmoidApproximated; + } +} + +} // namespace + +FullyConnectedLayer::FullyConnectedLayer( + const int input_size, + const int output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(GetScaledParams(bias)), + weights_(PreprocessWeights(weights, output_size)), + vector_math_(cpu_features), + activation_function_(GetActivationFunction(activation_function)) { + RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits) + << "Insufficient FC layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::ComputeOutput(rtc::ArrayView input) { + RTC_DCHECK_EQ(input.size(), input_size_); + rtc::ArrayView weights(weights_); + for (int o = 0; o < output_size_; ++o) { + output_[o] = activation_function_( + bias_[o] + vector_math_.DotProduct( + input, weights.subview(o * input_size_, input_size_))); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h new file mode 100644 index 0000000000..d23957a6f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Activation function for a neural network cell. +enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated }; + +// Maximum number of units for an FC layer. +constexpr int kFullyConnectedLayerMaxUnits = 24; + +// Fully-connected layer with a custom activation function which owns the output +// buffer. +class FullyConnectedLayer { + public: + // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`. + FullyConnectedLayer(int input_size, + int output_size, + rtc::ArrayView bias, + rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + FullyConnectedLayer(const FullyConnectedLayer&) = delete; + FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; + ~FullyConnectedLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return output_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Computes the fully-connected layer output. + void ComputeOutput(rtc::ArrayView input); + + private: + const int input_size_; + const int output_size_; + const std::vector bias_; + const std::vector weights_; + const VectorMath vector_math_; + rtc::FunctionView activation_function_; + // Over-allocated array with size equal to `output_size_`. + std::array output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc new file mode 100644 index 0000000000..ff9bb18bc2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerInputSize; +using ::rnnoise::kInputLayerOutputSize; + +// Fully connected layer test data. +constexpr std::array kFullyConnectedInputVector = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; +constexpr std::array kFullyConnectedExpectedOutput = { + -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f, + -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f, + 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f, + 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f}; + +class RnnFcParametrization + : public ::testing::TestWithParam {}; + +// Checks that the output of a fully connected layer is within tolerance given +// test input data. +TEST_P(RnnFcParametrization, CheckFullyConnectedLayerOutput) { + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, + /*cpu_features=*/GetParam(), + /*layer_name=*/"FC"); + fc.ComputeOutput(kFullyConnectedInputVector); + ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f); +} + +TEST_P(RnnFcParametrization, DISABLED_BenchmarkFullyConnectedLayer) { + const AvailableCpuFeatures cpu_features = GetParam(); + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, cpu_features, + /*layer_name=*/"FC"); + + constexpr int kNumTests = 10000; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + fc.ComputeOutput(kFullyConnectedInputVector); + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | " + << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnFcParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc new file mode 100644 index 0000000000..ef37410caa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kNumGruGates = 3; // Update, reset, output. + +std::vector PreprocessGruTensor(rtc::ArrayView tensor_src, + int output_size) { + // Transpose, cast and scale. + // `n` is the size of the first dimension of the 3-dim tensor `weights`. + const int n = rtc::CheckedDivExact(rtc::dchecked_cast(tensor_src.size()), + output_size * kNumGruGates); + const int stride_src = kNumGruGates * output_size; + const int stride_dst = n * output_size; + std::vector tensor_dst(tensor_src.size()); + for (int g = 0; g < kNumGruGates; ++g) { + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < n; ++i) { + tensor_dst[g * stride_dst + o * n + i] = + ::rnnoise::kWeightsScale * + static_cast( + tensor_src[i * stride_src + g * output_size + o]); + } + } + } + return tensor_dst; +} + +// Computes the output for the update or the reset gate. +// Operation: `g = sigmoid(W^T∙i + R^T∙s + b)` where +// - `g`: output gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `s`: state gate vector +// - `b`: bias vector +void ComputeUpdateResetGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView input, + rtc::ArrayView state, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + rtc::ArrayView gate) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_EQ(state.size(), output_size); + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_GE(gate.size(), output_size); // `gate` is over-allocated. + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + state, recurrent_weights.subview(o * output_size, output_size)); + gate[o] = ::rnnoise::SigmoidApproximated(x); + } +} + +// Computes the output for the state gate. +// Operation: `s' = u .* s + (1 - u) .* ReLU(W^T∙i + R^T∙(s .* r) + b)` where +// - `s'`: output state gate vector +// - `s`: previous state gate vector +// - `u`: update gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `r`: reset gate vector +// - `b`: bias vector +// - `.*` element-wise product +void ComputeStateGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView input, + rtc::ArrayView update, + rtc::ArrayView reset, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + rtc::ArrayView state) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_GE(update.size(), output_size); // `update` is over-allocated. + RTC_DCHECK_GE(reset.size(), output_size); // `reset` is over-allocated. + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_EQ(state.size(), output_size); + std::array reset_x_state; + for (int o = 0; o < output_size; ++o) { + reset_x_state[o] = state[o] * reset[o]; + } + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + {reset_x_state.data(), static_cast(output_size)}, + recurrent_weights.subview(o * output_size, output_size)); + state[o] = update[o] * state[o] + (1.f - update[o]) * std::max(0.f, x); + } +} + +} // namespace + +GatedRecurrentLayer::GatedRecurrentLayer( + const int input_size, + const int output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + const rtc::ArrayView recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(PreprocessGruTensor(bias, output_size)), + weights_(PreprocessGruTensor(weights, output_size)), + recurrent_weights_(PreprocessGruTensor(recurrent_weights, output_size)), + vector_math_(cpu_features) { + RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits) + << "Insufficient GRU layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(kNumGruGates * input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_ * output_size_, + recurrent_weights_.size()) + << "Mismatching input-output size and recurrent weight coefficients array" + " size (" + << layer_name << ")."; + Reset(); +} + +GatedRecurrentLayer::~GatedRecurrentLayer() = default; + +void GatedRecurrentLayer::Reset() { + state_.fill(0.f); +} + +void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView input) { + RTC_DCHECK_EQ(input.size(), input_size_); + + // The tensors below are organized as a sequence of flattened tensors for the + // `update`, `reset` and `state` gates. + rtc::ArrayView bias(bias_); + rtc::ArrayView weights(weights_); + rtc::ArrayView recurrent_weights(recurrent_weights_); + // Strides to access to the flattened tensors for a specific gate. + const int stride_weights = input_size_ * output_size_; + const int stride_recurrent_weights = output_size_ * output_size_; + + rtc::ArrayView state(state_.data(), output_size_); + + // Update gate. + std::array update; + ComputeUpdateResetGate( + input_size_, output_size_, vector_math_, input, state, + bias.subview(0, output_size_), weights.subview(0, stride_weights), + recurrent_weights.subview(0, stride_recurrent_weights), update); + // Reset gate. + std::array reset; + ComputeUpdateResetGate(input_size_, output_size_, vector_math_, input, state, + bias.subview(output_size_, output_size_), + weights.subview(stride_weights, stride_weights), + recurrent_weights.subview(stride_recurrent_weights, + stride_recurrent_weights), + reset); + // State gate. + ComputeStateGate(input_size_, output_size_, vector_math_, input, update, + reset, bias.subview(2 * output_size_, output_size_), + weights.subview(2 * stride_weights, stride_weights), + recurrent_weights.subview(2 * stride_recurrent_weights, + stride_recurrent_weights), + state); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h new file mode 100644 index 0000000000..3407dfcdf1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Maximum number of units for a GRU layer. +constexpr int kGruLayerMaxUnits = 24; + +// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as +// activation functions for the update/reset and output gates respectively. +class GatedRecurrentLayer { + public: + // Ctor. `output_size` cannot be greater than `kGruLayerMaxUnits`. + GatedRecurrentLayer(int input_size, + int output_size, + rtc::ArrayView bias, + rtc::ArrayView weights, + rtc::ArrayView recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + GatedRecurrentLayer(const GatedRecurrentLayer&) = delete; + GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete; + ~GatedRecurrentLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return state_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Resets the GRU state. + void Reset(); + // Computes the recurrent layer output and updates the status. + void ComputeOutput(rtc::ArrayView input); + + private: + const int input_size_; + const int output_size_; + const std::vector bias_; + const std::vector weights_; + const std::vector recurrent_weights_; + const VectorMath vector_math_; + // Over-allocated array with size equal to `output_size_`. + std::array state_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc new file mode 100644 index 0000000000..88ae72803a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +void TestGatedRecurrentLayer( + GatedRecurrentLayer& gru, + rtc::ArrayView input_sequence, + rtc::ArrayView expected_output_sequence) { + const int input_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast(input_sequence.size()), gru.input_size()); + const int output_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast(expected_output_sequence.size()), gru.size()); + ASSERT_EQ(input_sequence_length, output_sequence_length) + << "The test data length is invalid."; + // Feed the GRU layer and check the output at every step. + gru.Reset(); + for (int i = 0; i < input_sequence_length; ++i) { + SCOPED_TRACE(i); + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + const auto expected_output = + expected_output_sequence.subview(i * gru.size(), gru.size()); + ExpectNearAbsolute(expected_output, gru, 3e-6f); + } +} + +// Gated recurrent units layer test data. +constexpr int kGruInputSize = 5; +constexpr int kGruOutputSize = 4; +constexpr std::array kGruBias = {96, -99, -81, -114, 49, 119, + -118, 68, -76, 91, 121, 125}; +constexpr std::array kGruWeights = { + // Input 0. + 124, 9, 1, 116, // Update. + -66, -21, -118, -110, // Reset. + 104, 75, -23, -51, // Output. + // Input 1. + -72, -111, 47, 93, // Update. + 77, -98, 41, -8, // Reset. + 40, -23, -43, -107, // Output. + // Input 2. + 9, -73, 30, -32, // Update. + -2, 64, -26, 91, // Reset. + -48, -24, -28, -104, // Output. + // Input 3. + 74, -46, 116, 15, // Update. + 32, 52, -126, -38, // Reset. + -121, 12, -16, 110, // Output. + // Input 4. + -95, 66, -103, -35, // Update. + -38, 3, -126, -61, // Reset. + 28, 98, -117, -43 // Output. +}; +constexpr std::array kGruRecurrentWeights = { + // Output 0. + -3, 87, 50, 51, // Update. + -22, 27, -39, 62, // Reset. + 31, -83, -52, -48, // Output. + // Output 1. + -6, 83, -19, 104, // Update. + 105, 48, 23, 68, // Reset. + 23, 40, 7, -120, // Output. + // Output 2. + 64, -62, 117, 85, // Update. + 51, -43, 54, -105, // Reset. + 120, 56, -128, -107, // Output. + // Output 3. + 39, 50, -17, -47, // Update. + -117, 14, 108, 12, // Reset. + -7, -72, 103, -87, // Output. +}; +constexpr std::array kGruInputSequence = { + 0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f, + 0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f, + 0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f, + 0.24517593f, 0.47657707f, 0.57064998f, 0.435184f, 0.19319285f}; +constexpr std::array kGruExpectedOutputSequence = { + 0.0239123f, 0.5773077f, 0.f, 0.f, + 0.01282811f, 0.64330572f, 0.f, 0.04863098f, + 0.00781069f, 0.75267816f, 0.f, 0.02579715f, + 0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f}; + +class RnnGruParametrization + : public ::testing::TestWithParam {}; + +// Checks that the output of a GRU layer is within tolerance given test input +// data. +TEST_P(RnnGruParametrization, CheckGatedRecurrentLayer) { + GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights, + kGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + TestGatedRecurrentLayer(gru, kGruInputSequence, kGruExpectedOutputSequence); +} + +TEST_P(RnnGruParametrization, DISABLED_BenchmarkGatedRecurrentLayer) { + // Prefetch test data. + std::unique_ptr reader = CreateGruInputReader(); + std::vector gru_input_sequence(reader->size()); + reader->ReadChunk(gru_input_sequence); + + using ::rnnoise::kHiddenGruBias; + using ::rnnoise::kHiddenGruRecurrentWeights; + using ::rnnoise::kHiddenGruWeights; + using ::rnnoise::kHiddenLayerOutputSize; + using ::rnnoise::kInputLayerOutputSize; + + GatedRecurrentLayer gru(kInputLayerOutputSize, kHiddenLayerOutputSize, + kHiddenGruBias, kHiddenGruWeights, + kHiddenGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + + rtc::ArrayView input_sequence(gru_input_sequence); + ASSERT_EQ(input_sequence.size() % kInputLayerOutputSize, + static_cast(0)); + const int input_sequence_length = + input_sequence.size() / kInputLayerOutputSize; + + constexpr int kNumTests = 100; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + for (int i = 0; i < input_sequence_length; ++i) { + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + } + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnGruParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc new file mode 100644 index 0000000000..4c5409a14e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr std::array kFeatures = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; + +void WarmUpRnnVad(RnnVad& rnn_vad) { + for (int i = 0; i < 10; ++i) { + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + } +} + +// Checks that the speech probability is zero with silence. +TEST(RnnVadTest, CheckZeroProbabilityWithSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + EXPECT_EQ(rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true), 0.f); +} + +// Checks that the same output is produced after reset given the same input +// sequence. +TEST(RnnVadTest, CheckRnnVadReset) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.Reset(); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +// Checks that the same output is produced after silence is observed given the +// same input sequence. +TEST(RnnVadTest, CheckRnnVadSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build new file mode 100644 index 0000000000..3f00e43e7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_auto_correlation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build new file mode 100644 index 0000000000..64d3371d81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build new file mode 100644 index 0000000000..82e5302312 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build new file mode 100644 index 0000000000..7dd4619965 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_layers_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build new file mode 100644 index 0000000000..1fcbbbe408 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_lp_residual_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build new file mode 100644 index 0000000000..6a69de9e29 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_pitch_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build new file mode 100644 index 0000000000..d2a4eb7261 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_ring_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build new file mode 100644 index 0000000000..c6913470c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_sequence_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build new file mode 100644 index 0000000000..8298c7e091 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_spectral_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build new file mode 100644 index 0000000000..acb9330454 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_symmetric_matrix_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc new file mode 100644 index 0000000000..a0e1242eb4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_compare.h" + +ABSL_FLAG(std::string, i, "", "Path to the input wav file"); +ABSL_FLAG(std::string, f, "", "Path to the output features file"); +ABSL_FLAG(std::string, o, "", "Path to the output VAD probabilities file"); + +namespace webrtc { +namespace rnn_vad { +namespace test { + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + + // Open wav input file and check properties. + const std::string input_wav_file = absl::GetFlag(FLAGS_i); + WavReader wav_reader(input_wav_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files are supported"; + return 1; + } + if (wav_reader.sample_rate() % 100 != 0) { + RTC_LOG(LS_ERROR) << "The sample rate rate must allow 10 ms frames."; + return 1; + } + RTC_LOG(LS_INFO) << "Input sample rate: " << wav_reader.sample_rate(); + + // Init output files. + const std::string output_vad_probs_file = absl::GetFlag(FLAGS_o); + FILE* vad_probs_file = fopen(output_vad_probs_file.c_str(), "wb"); + FILE* features_file = nullptr; + const std::string output_feature_file = absl::GetFlag(FLAGS_f); + if (!output_feature_file.empty()) { + features_file = fopen(output_feature_file.c_str(), "wb"); + } + + // Initialize. + const int frame_size_10ms = + rtc::CheckedDivExact(wav_reader.sample_rate(), 100); + std::vector samples_10ms; + samples_10ms.resize(frame_size_10ms); + std::array samples_10ms_24kHz; + PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::array feature_vector; + RnnVad rnn_vad(cpu_features); + + // Compute VAD probabilities. + while (true) { + // Read frame at the input sample rate. + const size_t read_samples = + wav_reader.ReadSamples(frame_size_10ms, samples_10ms.data()); + if (rtc::SafeLt(read_samples, frame_size_10ms)) { + break; // EOF. + } + // Resample input. + resampler.Resample(samples_10ms.data(), samples_10ms.size(), + samples_10ms_24kHz.data(), samples_10ms_24kHz.size()); + + // Extract features and feed the RNN. + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + samples_10ms_24kHz, feature_vector); + float vad_probability = + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + // Write voice probability. + RTC_DCHECK_GE(vad_probability, 0.f); + RTC_DCHECK_GE(1.f, vad_probability); + fwrite(&vad_probability, sizeof(float), 1, vad_probs_file); + // Write features. + if (features_file) { + const float float_is_silence = is_silence ? 1.f : 0.f; + fwrite(&float_is_silence, sizeof(float), 1, features_file); + if (is_silence) { + // Do not write uninitialized values. + feature_vector.fill(0.f); + } + fwrite(feature_vector.data(), sizeof(float), kFeatureVectorSize, + features_file); + } + } + + // Close output file(s). + fclose(vad_probs_file); + RTC_LOG(LS_INFO) << "VAD probabilities written to " << output_vad_probs_file; + if (features_file) { + fclose(features_file); + RTC_LOG(LS_INFO) << "features written to " << output_feature_file; + } + + return 0; +} + +} // namespace test +} // namespace rnn_vad +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::rnn_vad::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc new file mode 100644 index 0000000000..f33cd14a8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include + +#include "common_audio/resampler/push_sinc_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kFrameSize10ms48kHz = 480; + +void DumpPerfStats(int num_samples, + int sample_rate, + double average_us, + double standard_deviation) { + float audio_track_length_ms = + 1e3f * static_cast(num_samples) / static_cast(sample_rate); + float average_ms = static_cast(average_us) / 1e3f; + float speed = audio_track_length_ms / average_ms; + RTC_LOG(LS_INFO) << "track duration (ms): " << audio_track_length_ms; + RTC_LOG(LS_INFO) << "average processing time (ms): " << average_ms << " +/- " + << (standard_deviation / 1e3); + RTC_LOG(LS_INFO) << "speed: " << speed << "x"; +} + +// When the RNN VAD model is updated and the expected output changes, set the +// constant below to true in order to write new expected output binary files. +constexpr bool kWriteComputedOutputToFile = false; + +// Avoids that one forgets to set `kWriteComputedOutputToFile` back to false +// when the expected output files are re-exported. +TEST(RnnVadTest, CheckWriteComputedOutputIsFalse) { + ASSERT_FALSE(kWriteComputedOutputToFile) + << "Cannot land if kWriteComputedOutput is true."; +} + +class RnnVadProbabilityParametrization + : public ::testing::TestWithParam {}; + +// Checks that the computed VAD probability for a test input sequence sampled at +// 48 kHz is within tolerance. +TEST_P(RnnVadProbabilityParametrization, RnnVadProbabilityWithinTolerance) { + // Init resampler, feature extractor and RNN. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + RnnVad rnn_vad(cpu_features); + + // Init input samples and expected output readers. + std::unique_ptr samples_reader = CreatePcmSamplesReader(); + std::unique_ptr expected_vad_prob_reader = CreateVadProbsReader(); + + // Input length. The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + + // Init buffers. + std::vector samples_48k(kFrameSize10ms48kHz); + std::vector samples_24k(kFrameSize10ms24kHz); + std::vector feature_vector(kFeatureVectorSize); + std::vector computed_vad_prob(num_frames); + std::vector expected_vad_prob(num_frames); + + // Read expected output. + ASSERT_TRUE(expected_vad_prob_reader->ReadChunk(expected_vad_prob)); + + // Compute VAD probabilities on the downsampled input. + float cumulative_error = 0.f; + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples_48k)); + decimator.Resample(samples_48k.data(), samples_48k.size(), + samples_24k.data(), samples_24k.size()); + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples_24k.data(), kFrameSize10ms24kHz}, + {feature_vector.data(), kFeatureVectorSize}); + computed_vad_prob[i] = rnn_vad.ComputeVadProbability( + {feature_vector.data(), kFeatureVectorSize}, is_silence); + EXPECT_NEAR(computed_vad_prob[i], expected_vad_prob[i], 1e-3f); + cumulative_error += std::abs(computed_vad_prob[i] - expected_vad_prob[i]); + } + // Check average error. + EXPECT_LT(cumulative_error / num_frames, 1e-4f); + + if (kWriteComputedOutputToFile) { + FileWriter vad_prob_writer("new_vad_prob.dat"); + vad_prob_writer.WriteChunk(computed_vad_prob); + } +} + +// Performance test for the RNN VAD (pre-fetching and downsampling are +// excluded). Keep disabled and only enable locally to measure performance as +// follows: +// - on desktop: run the this unit test adding "--logs"; +// - on android: run the this unit test adding "--logcat-output-file". +TEST_P(RnnVadProbabilityParametrization, DISABLED_RnnVadPerformance) { + // PCM samples reader and buffers. + std::unique_ptr samples_reader = CreatePcmSamplesReader(); + // The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + std::array samples; + // Pre-fetch and decimate samples. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + std::vector prefetched_decimated_samples; + prefetched_decimated_samples.resize(num_frames * kFrameSize10ms24kHz); + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples)); + decimator.Resample(samples.data(), samples.size(), + &prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz); + } + // Initialize. + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + std::array feature_vector; + RnnVad rnn_vad(cpu_features); + constexpr int number_of_tests = 100; + ::webrtc::test::PerformanceTimer perf_timer(number_of_tests); + for (int k = 0; k < number_of_tests; ++k) { + features_extractor.Reset(); + rnn_vad.Reset(); + // Process frames. + perf_timer.StartTimer(); + for (int i = 0; i < num_frames; ++i) { + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {&prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz}, + feature_vector); + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + } + perf_timer.StopTimer(); + } + DumpPerfStats(num_frames * kFrameSize10ms24kHz, kSampleRate24kHz, + perf_timer.GetDurationAverage(), + perf_timer.GetDurationStandardDeviation()); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2 && available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnVadProbabilityParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h new file mode 100644 index 0000000000..a7402788c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear buffer implementation to (i) push fixed size chunks of sequential data +// and (ii) view contiguous parts of the buffer. The buffer and the pushed +// chunks have size S and N respectively. For instance, when S = 2N the first +// half of the sequence buffer is replaced with its second half, and the new N +// values are written at the end of the buffer. +// The class also provides a view on the most recent M values, where 0 < M <= S +// and by default M = N. +template +class SequenceBuffer { + static_assert(N <= S, + "The new chunk size cannot be larger than the sequence buffer " + "size."); + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + + public: + SequenceBuffer() : buffer_(S) { + RTC_DCHECK_EQ(S, buffer_.size()); + Reset(); + } + SequenceBuffer(const SequenceBuffer&) = delete; + SequenceBuffer& operator=(const SequenceBuffer&) = delete; + ~SequenceBuffer() = default; + int size() const { return S; } + int chunks_size() const { return N; } + // Sets the sequence buffer values to zero. + void Reset() { std::fill(buffer_.begin(), buffer_.end(), 0); } + // Returns a view on the whole buffer. + rtc::ArrayView GetBufferView() const { + return {buffer_.data(), S}; + } + // Returns a view on the M most recent values of the buffer. + rtc::ArrayView GetMostRecentValuesView() const { + static_assert(M <= S, + "The number of most recent values cannot be larger than the " + "sequence buffer size."); + return {buffer_.data() + S - M, M}; + } + // Shifts left the buffer by N items and add new N items at the end. + void Push(rtc::ArrayView new_values) { + // Make space for the new values. + if (S > N) + std::memmove(buffer_.data(), buffer_.data() + N, (S - N) * sizeof(T)); + // Copy the new values at the end of the buffer. + std::memcpy(buffer_.data() + S - N, new_values.data(), N * sizeof(T)); + } + + private: + std::vector buffer_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc new file mode 100644 index 0000000000..af005833c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template +void TestSequenceBufferPushOp() { + SCOPED_TRACE(S); + SCOPED_TRACE(N); + SequenceBuffer seq_buf; + auto seq_buf_view = seq_buf.GetBufferView(); + std::array chunk; + + // Check that a chunk is fully gone after ceil(S / N) push ops. + chunk.fill(1); + seq_buf.Push(chunk); + chunk.fill(0); + constexpr int required_push_ops = (S % N) ? S / N + 1 : S / N; + for (int i = 0; i < required_push_ops - 1; ++i) { + SCOPED_TRACE(i); + seq_buf.Push(chunk); + // Still in the buffer. + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(1, *m); + } + // Gone after another push. + seq_buf.Push(chunk); + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(0, *m); + + // Check that the last item moves left by N positions after a push op. + if (S > N) { + // Fill in with non-zero values. + for (int i = 0; i < N; ++i) + chunk[i] = static_cast(i + 1); + seq_buf.Push(chunk); + // With the next Push(), `last` will be moved left by N positions. + const T last = chunk[N - 1]; + for (int i = 0; i < N; ++i) + chunk[i] = static_cast(last + i + 1); + seq_buf.Push(chunk); + EXPECT_EQ(last, seq_buf_view[S - N - 1]); + } +} + +TEST(RnnVadTest, SequenceBufferGetters) { + constexpr int buffer_size = 8; + constexpr int chunk_size = 8; + SequenceBuffer seq_buf; + EXPECT_EQ(buffer_size, seq_buf.size()); + EXPECT_EQ(chunk_size, seq_buf.chunks_size()); + // Test view. + auto seq_buf_view = seq_buf.GetBufferView(); + EXPECT_EQ(0, seq_buf_view[0]); + EXPECT_EQ(0, seq_buf_view[seq_buf_view.size() - 1]); + constexpr std::array chunk = {10, 20, 30, 40, + 50, 60, 70, 80}; + seq_buf.Push(chunk); + EXPECT_EQ(10, *seq_buf_view.begin()); + EXPECT_EQ(80, *(seq_buf_view.end() - 1)); +} + +TEST(RnnVadTest, SequenceBufferPushOpsUnsigned) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsSigned) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsFloating) { + TestSequenceBufferPushOp(); // Chunk size: 25%. + TestSequenceBufferPushOp(); // Chunk size: 50%. + TestSequenceBufferPushOp(); // Chunk size: 100%. + TestSequenceBufferPushOp(); // Non-integer ratio. +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc new file mode 100644 index 0000000000..96086babb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr float kSilenceThreshold = 0.04f; + +// Computes the new cepstral difference stats and pushes them into the passed +// symmetric matrix buffer. +void UpdateCepstralDifferenceStats( + rtc::ArrayView new_cepstral_coeffs, + const RingBuffer& ring_buf, + SymmetricMatrixBuffer* sym_matrix_buf) { + RTC_DCHECK(sym_matrix_buf); + // Compute the new cepstral distance stats. + std::array distances; + for (int i = 0; i < kCepstralCoeffsHistorySize - 1; ++i) { + const int delay = i + 1; + auto old_cepstral_coeffs = ring_buf.GetArrayView(delay); + distances[i] = 0.f; + for (int k = 0; k < kNumBands; ++k) { + const float c = new_cepstral_coeffs[k] - old_cepstral_coeffs[k]; + distances[i] += c * c; + } + } + // Push the new spectral distance stats into the symmetric matrix buffer. + sym_matrix_buf->Push(distances); +} + +// Computes the first half of the Vorbis window. +std::array ComputeScaledHalfVorbisWindow( + float scaling = 1.f) { + constexpr int kHalfSize = kFrameSize20ms24kHz / 2; + std::array half_window{}; + for (int i = 0; i < kHalfSize; ++i) { + half_window[i] = + scaling * + std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfSize) * + std::sin(0.5 * kPi * (i + 0.5) / kHalfSize)); + } + return half_window; +} + +// Computes the forward FFT on a 20 ms frame to which a given window function is +// applied. The Fourier coefficient corresponding to the Nyquist frequency is +// set to zero (it is never used and this allows to simplify the code). +void ComputeWindowedForwardFft( + rtc::ArrayView frame, + const std::array& half_window, + Pffft::FloatBuffer* fft_input_buffer, + Pffft::FloatBuffer* fft_output_buffer, + Pffft* fft) { + RTC_DCHECK_EQ(frame.size(), 2 * half_window.size()); + // Apply windowing. + auto in = fft_input_buffer->GetView(); + for (int i = 0, j = kFrameSize20ms24kHz - 1; + rtc::SafeLt(i, half_window.size()); ++i, --j) { + in[i] = frame[i] * half_window[i]; + in[j] = frame[j] * half_window[i]; + } + fft->ForwardTransform(*fft_input_buffer, fft_output_buffer, /*ordered=*/true); + // Set the Nyquist frequency coefficient to zero. + auto out = fft_output_buffer->GetView(); + out[1] = 0.f; +} + +} // namespace + +SpectralFeaturesExtractor::SpectralFeaturesExtractor() + : half_window_(ComputeScaledHalfVorbisWindow( + 1.f / static_cast(kFrameSize20ms24kHz))), + fft_(kFrameSize20ms24kHz, Pffft::FftType::kReal), + fft_buffer_(fft_.CreateBuffer()), + reference_frame_fft_(fft_.CreateBuffer()), + lagged_frame_fft_(fft_.CreateBuffer()), + dct_table_(ComputeDctTable()) {} + +SpectralFeaturesExtractor::~SpectralFeaturesExtractor() = default; + +void SpectralFeaturesExtractor::Reset() { + cepstral_coeffs_ring_buf_.Reset(); + cepstral_diffs_buf_.Reset(); +} + +bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView reference_frame, + rtc::ArrayView lagged_frame, + rtc::ArrayView higher_bands_cepstrum, + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative, + rtc::ArrayView bands_cross_corr, + float* variability) { + // Compute the Opus band energies for the reference frame. + ComputeWindowedForwardFft(reference_frame, half_window_, fft_buffer_.get(), + reference_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation( + reference_frame_fft_->GetConstView(), reference_frame_bands_energy_); + // Check if the reference frame has silence. + const float tot_energy = + std::accumulate(reference_frame_bands_energy_.begin(), + reference_frame_bands_energy_.end(), 0.f); + if (tot_energy < kSilenceThreshold) { + return true; + } + // Compute the Opus band energies for the lagged frame. + ComputeWindowedForwardFft(lagged_frame, half_window_, fft_buffer_.get(), + lagged_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation(lagged_frame_fft_->GetConstView(), + lagged_frame_bands_energy_); + // Log of the band energies for the reference frame. + std::array log_bands_energy; + ComputeSmoothedLogMagnitudeSpectrum(reference_frame_bands_energy_, + log_bands_energy); + // Reference frame cepstrum. + std::array cepstrum; + ComputeDct(log_bands_energy, dct_table_, cepstrum); + // Ad-hoc correction terms for the first two cepstral coefficients. + cepstrum[0] -= 12.f; + cepstrum[1] -= 4.f; + // Update the ring buffer and the cepstral difference stats. + cepstral_coeffs_ring_buf_.Push(cepstrum); + UpdateCepstralDifferenceStats(cepstrum, cepstral_coeffs_ring_buf_, + &cepstral_diffs_buf_); + // Write the higher bands cepstral coefficients. + RTC_DCHECK_EQ(cepstrum.size() - kNumLowerBands, higher_bands_cepstrum.size()); + std::copy(cepstrum.begin() + kNumLowerBands, cepstrum.end(), + higher_bands_cepstrum.begin()); + // Compute and write remaining features. + ComputeAvgAndDerivatives(average, first_derivative, second_derivative); + ComputeNormalizedCepstralCorrelation(bands_cross_corr); + RTC_DCHECK(variability); + *variability = ComputeVariability(); + return false; +} + +void SpectralFeaturesExtractor::ComputeAvgAndDerivatives( + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative) const { + auto curr = cepstral_coeffs_ring_buf_.GetArrayView(0); + auto prev1 = cepstral_coeffs_ring_buf_.GetArrayView(1); + auto prev2 = cepstral_coeffs_ring_buf_.GetArrayView(2); + RTC_DCHECK_EQ(average.size(), first_derivative.size()); + RTC_DCHECK_EQ(first_derivative.size(), second_derivative.size()); + RTC_DCHECK_LE(average.size(), curr.size()); + for (int i = 0; rtc::SafeLt(i, average.size()); ++i) { + // Average, kernel: [1, 1, 1]. + average[i] = curr[i] + prev1[i] + prev2[i]; + // First derivative, kernel: [1, 0, - 1]. + first_derivative[i] = curr[i] - prev2[i]; + // Second derivative, Laplacian kernel: [1, -2, 1]. + second_derivative[i] = curr[i] - 2 * prev1[i] + prev2[i]; + } +} + +void SpectralFeaturesExtractor::ComputeNormalizedCepstralCorrelation( + rtc::ArrayView bands_cross_corr) { + spectral_correlator_.ComputeCrossCorrelation( + reference_frame_fft_->GetConstView(), lagged_frame_fft_->GetConstView(), + bands_cross_corr_); + // Normalize. + for (int i = 0; rtc::SafeLt(i, bands_cross_corr_.size()); ++i) { + bands_cross_corr_[i] = + bands_cross_corr_[i] / + std::sqrt(0.001f + reference_frame_bands_energy_[i] * + lagged_frame_bands_energy_[i]); + } + // Cepstrum. + ComputeDct(bands_cross_corr_, dct_table_, bands_cross_corr); + // Ad-hoc correction terms for the first two cepstral coefficients. + bands_cross_corr[0] -= 1.3f; + bands_cross_corr[1] -= 0.9f; +} + +float SpectralFeaturesExtractor::ComputeVariability() const { + // Compute cepstral variability score. + float variability = 0.f; + for (int delay1 = 0; delay1 < kCepstralCoeffsHistorySize; ++delay1) { + float min_dist = std::numeric_limits::max(); + for (int delay2 = 0; delay2 < kCepstralCoeffsHistorySize; ++delay2) { + if (delay1 == delay2) // The distance would be 0. + continue; + min_dist = + std::min(min_dist, cepstral_diffs_buf_.GetValue(delay1, delay2)); + } + variability += min_dist; + } + // Normalize (based on training set stats). + // TODO(bugs.webrtc.org/10480): Isolate normalization from feature extraction. + return variability / kCepstralCoeffsHistorySize - 2.1f; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h new file mode 100644 index 0000000000..d327ef8e01 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute spectral features. +class SpectralFeaturesExtractor { + public: + SpectralFeaturesExtractor(); + SpectralFeaturesExtractor(const SpectralFeaturesExtractor&) = delete; + SpectralFeaturesExtractor& operator=(const SpectralFeaturesExtractor&) = + delete; + ~SpectralFeaturesExtractor(); + // Resets the internal state of the feature extractor. + void Reset(); + // Analyzes a pair of reference and lagged frames from the pitch buffer, + // detects silence and computes features. If silence is detected, the output + // is neither computed nor written. + bool CheckSilenceComputeFeatures( + rtc::ArrayView reference_frame, + rtc::ArrayView lagged_frame, + rtc::ArrayView higher_bands_cepstrum, + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative, + rtc::ArrayView bands_cross_corr, + float* variability); + + private: + void ComputeAvgAndDerivatives( + rtc::ArrayView average, + rtc::ArrayView first_derivative, + rtc::ArrayView second_derivative) const; + void ComputeNormalizedCepstralCorrelation( + rtc::ArrayView bands_cross_corr); + float ComputeVariability() const; + + const std::array half_window_; + Pffft fft_; + std::unique_ptr fft_buffer_; + std::unique_ptr reference_frame_fft_; + std::unique_ptr lagged_frame_fft_; + SpectralCorrelator spectral_correlator_; + std::array reference_frame_bands_energy_; + std::array lagged_frame_bands_energy_; + std::array bands_cross_corr_; + const std::array dct_table_; + RingBuffer + cepstral_coeffs_ring_buf_; + SymmetricMatrixBuffer cepstral_diffs_buf_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc new file mode 100644 index 0000000000..a10b0f7ec9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include +#include +#include + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Weights for each FFT coefficient for each Opus band (Nyquist frequency +// excluded). The size of each band is specified in +// `kOpusScaleNumBins24kHz20ms`. +constexpr std::array kOpusBandWeights24kHz20ms = + {{ + 0.f, 0.25f, 0.5f, 0.75f, // Band 0 + 0.f, 0.25f, 0.5f, 0.75f, // Band 1 + 0.f, 0.25f, 0.5f, 0.75f, // Band 2 + 0.f, 0.25f, 0.5f, 0.75f, // Band 3 + 0.f, 0.25f, 0.5f, 0.75f, // Band 4 + 0.f, 0.25f, 0.5f, 0.75f, // Band 5 + 0.f, 0.25f, 0.5f, 0.75f, // Band 6 + 0.f, 0.25f, 0.5f, 0.75f, // Band 7 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 8 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 9 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 10 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 11 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 12 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 13 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 14 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 15 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 16 + 0.f, 0.03125f, 0.0625f, 0.09375f, 0.125f, + 0.15625f, 0.1875f, 0.21875f, 0.25f, 0.28125f, + 0.3125f, 0.34375f, 0.375f, 0.40625f, 0.4375f, + 0.46875f, 0.5f, 0.53125f, 0.5625f, 0.59375f, + 0.625f, 0.65625f, 0.6875f, 0.71875f, 0.75f, + 0.78125f, 0.8125f, 0.84375f, 0.875f, 0.90625f, + 0.9375f, 0.96875f, // Band 17 + 0.f, 0.0208333f, 0.0416667f, 0.0625f, 0.0833333f, + 0.104167f, 0.125f, 0.145833f, 0.166667f, 0.1875f, + 0.208333f, 0.229167f, 0.25f, 0.270833f, 0.291667f, + 0.3125f, 0.333333f, 0.354167f, 0.375f, 0.395833f, + 0.416667f, 0.4375f, 0.458333f, 0.479167f, 0.5f, + 0.520833f, 0.541667f, 0.5625f, 0.583333f, 0.604167f, + 0.625f, 0.645833f, 0.666667f, 0.6875f, 0.708333f, + 0.729167f, 0.75f, 0.770833f, 0.791667f, 0.8125f, + 0.833333f, 0.854167f, 0.875f, 0.895833f, 0.916667f, + 0.9375f, 0.958333f, 0.979167f // Band 18 + }}; + +} // namespace + +SpectralCorrelator::SpectralCorrelator() + : weights_(kOpusBandWeights24kHz20ms.begin(), + kOpusBandWeights24kHz20ms.end()) {} + +SpectralCorrelator::~SpectralCorrelator() = default; + +void SpectralCorrelator::ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) const { + ComputeCrossCorrelation(x, x, auto_corr); +} + +void SpectralCorrelator::ComputeCrossCorrelation( + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView cross_corr) const { + RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz); + RTC_DCHECK_EQ(x.size(), y.size()); + RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed."; + RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed."; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int k = 0; // Next Fourier coefficient index. + cross_corr[0] = 0.f; + for (int i = 0; i < kOpusBands24kHz - 1; ++i) { + cross_corr[i + 1] = 0.f; + for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) { // Band size. + const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1]; + const float tmp = weights_[k] * v; + cross_corr[i] += v - tmp; + cross_corr[i + 1] += tmp; + k++; + } + } + cross_corr[0] *= 2.f; // The first band only gets half contribution. + RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2); // Nyquist coefficient never used. +} + +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView bands_energy, + rtc::ArrayView log_bands_energy) { + RTC_DCHECK_LE(bands_energy.size(), kNumBands); + constexpr float kOneByHundred = 1e-2f; + constexpr float kLogOneByHundred = -2.f; + // Init. + float log_max = kLogOneByHundred; + float follow = kLogOneByHundred; + const auto smooth = [&log_max, &follow](float x) { + x = std::max(log_max - 7.f, std::max(follow - 1.5f, x)); + log_max = std::max(log_max, x); + follow = std::max(follow - 1.5f, x); + return x; + }; + // Smoothing over the bands for which the band energy is defined. + for (int i = 0; rtc::SafeLt(i, bands_energy.size()); ++i) { + log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i])); + } + // Smoothing over the remaining bands (zero energy). + for (int i = bands_energy.size(); i < kNumBands; ++i) { + log_bands_energy[i] = smooth(kLogOneByHundred); + } +} + +std::array ComputeDctTable() { + std::array dct_table; + const double k = std::sqrt(0.5); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < kNumBands; ++j) + dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands); + dct_table[i * kNumBands] *= k; + } + return dct_table; +} + +void ComputeDct(rtc::ArrayView in, + rtc::ArrayView dct_table, + rtc::ArrayView out) { + // DCT scaling factor - i.e., sqrt(2 / kNumBands). + constexpr float kDctScalingFactor = 0.301511345f; + constexpr float kDctScalingFactorError = + kDctScalingFactor * kDctScalingFactor - + 2.f / static_cast(kNumBands); + static_assert( + (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) || + (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f), + "kNumBands changed and kDctScalingFactor has not been updated."); + RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported."; + RTC_DCHECK_LE(in.size(), kNumBands); + RTC_DCHECK_LE(1, out.size()); + RTC_DCHECK_LE(out.size(), in.size()); + for (int i = 0; rtc::SafeLt(i, out.size()); ++i) { + out[i] = 0.f; + for (int j = 0; rtc::SafeLt(j, in.size()); ++j) { + out[i] += in[j] * dct_table[j * kNumBands + i]; + } + // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table. + out[i] *= kDctScalingFactor; + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h new file mode 100644 index 0000000000..f4b293a567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ + +#include + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist +// frequency. However, band #19 gets the contributions from band #18 because +// of the symmetric triangular filter with peak response at 12 kHz. +constexpr int kOpusBands24kHz = 20; +static_assert(kOpusBands24kHz < kNumBands, + "The number of bands at 24 kHz must be less than those defined " + "in the Opus scale at 48 kHz."); + +// Number of FFT frequency bins covered by each band in the Opus scale at a +// sample rate of 24 kHz for 20 ms frames. +// Declared here for unit testing. +constexpr std::array GetOpusScaleNumBins24kHz20ms() { + return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48}; +} + +// TODO(bugs.webrtc.org/10480): Move to a separate file. +// Class to compute band-wise spectral features in the Opus perceptual scale +// for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular +// filters with peak response at the each band boundary. +class SpectralCorrelator { + public: + // Ctor. + SpectralCorrelator(); + SpectralCorrelator(const SpectralCorrelator&) = delete; + SpectralCorrelator& operator=(const SpectralCorrelator&) = delete; + ~SpectralCorrelator(); + + // Computes the band-wise spectral auto-correlations. + // `x` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients + // where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeAutoCorrelation( + rtc::ArrayView x, + rtc::ArrayView auto_corr) const; + + // Computes the band-wise spectral cross-correlations. + // `x` and `y` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients where + // x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeCrossCorrelation( + rtc::ArrayView x, + rtc::ArrayView y, + rtc::ArrayView cross_corr) const; + + private: + const std::vector weights_; // Weights for each Fourier coefficient. +}; + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Given a vector of Opus-bands energy coefficients, +// computes the log magnitude spectrum applying smoothing both over time and +// over frequency. Declared here for unit testing. +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView bands_energy, + rtc::ArrayView log_bands_energy); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Creates a DCT table for arrays having size equal to +// `kNumBands`. Declared here for unit testing. +std::array ComputeDctTable(); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Computes DCT for `in` given a pre-computed DCT table. +// In-place computation is not allowed and `out` can be smaller than `in` in +// order to only compute the first DCT coefficients. Declared here for unit +// testing. +void ComputeDct(rtc::ArrayView in, + rtc::ArrayView dct_table, + rtc::ArrayView out); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc new file mode 100644 index 0000000000..ece4eb5024 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Generates the values for the array named `kOpusBandWeights24kHz20ms` in the +// anonymous namespace of the .cc file, which is the array of FFT coefficient +// weights for the Opus scale triangular filters. +std::vector ComputeTriangularFiltersWeights() { + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + const auto& v = kOpusScaleNumBins24kHz20ms; // Alias. + const int num_weights = std::accumulate(kOpusScaleNumBins24kHz20ms.begin(), + kOpusScaleNumBins24kHz20ms.end(), 0); + std::vector weights(num_weights); + int next_fft_coeff_index = 0; + for (int band = 0; rtc::SafeLt(band, v.size()); ++band) { + const int band_size = v[band]; + for (int j = 0; rtc::SafeLt(j, band_size); ++j) { + weights[next_fft_coeff_index + j] = static_cast(j) / band_size; + } + next_fft_coeff_index += band_size; + } + return weights; +} + +// Checks that the values returned by GetOpusScaleNumBins24kHz20ms() match the +// Opus scale frequency boundaries. +TEST(RnnVadTest, TestOpusScaleBoundaries) { + constexpr int kBandFrequencyBoundariesHz[kNumBands - 1] = { + 200, 400, 600, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, + 3200, 4000, 4800, 5600, 6800, 8000, 9600, 12000, 15600, 20000}; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int prev = 0; + for (int i = 0; rtc::SafeLt(i, kOpusScaleNumBins24kHz20ms.size()); ++i) { + int boundary = + kBandFrequencyBoundariesHz[i] * kFrameSize20ms24kHz / kSampleRate24kHz; + EXPECT_EQ(kOpusScaleNumBins24kHz20ms[i], boundary - prev); + prev = boundary; + } +} + +// Checks that the computed triangular filters weights for the Opus scale are +// monotonic withing each Opus band. This test should only be enabled when +// ComputeTriangularFiltersWeights() is changed and `kOpusBandWeights24kHz20ms` +// is updated accordingly. +TEST(RnnVadTest, DISABLED_TestOpusScaleWeights) { + auto weights = ComputeTriangularFiltersWeights(); + int i = 0; + for (int band_size : GetOpusScaleNumBins24kHz20ms()) { + SCOPED_TRACE(band_size); + rtc::ArrayView band_weights(weights.data() + i, band_size); + float prev = -1.f; + for (float weight : band_weights) { + EXPECT_LT(prev, weight); + prev = weight; + } + i += band_size; + } +} + +// Checks that the computed band-wise auto-correlation is non-negative for a +// simple input vector of FFT coefficients. +TEST(RnnVadTest, SpectralCorrelatorValidOutput) { + // Input: vector of (1, 1j) values. + Pffft fft(kFrameSize20ms24kHz, Pffft::FftType::kReal); + auto in = fft.CreateBuffer(); + std::array out; + auto in_view = in->GetView(); + std::fill(in_view.begin(), in_view.end(), 1.f); + in_view[1] = 0.f; // Nyquist frequency. + // Compute and check output. + SpectralCorrelator e; + e.ComputeAutoCorrelation(in_view, out); + for (int i = 0; i < kOpusBands24kHz; ++i) { + SCOPED_TRACE(i); + EXPECT_GT(out[i], 0.f); + } +} + +// Checks that the computed smoothed log magnitude spectrum is within tolerance +// given hard-coded test input data. +TEST(RnnVadTest, ComputeSmoothedLogMagnitudeSpectrumWithinTolerance) { + constexpr std::array input = { + {86.060539245605f, 275.668334960938f, 43.406528472900f, 6.541896820068f, + 17.964015960693f, 8.090919494629f, 1.261920094490f, 1.212702631950f, + 1.619154453278f, 0.508935272694f, 0.346316039562f, 0.237035423517f, + 0.172424271703f, 0.271657168865f, 0.126088857651f, 0.139967113733f, + 0.207200810313f, 0.155893072486f, 0.091090843081f, 0.033391401172f, + 0.013879744336f, 0.011973354965f}}; + constexpr std::array expected_output = { + {1.934854507446f, 2.440402746201f, 1.637655138969f, 0.816367030144f, + 1.254645109177f, 0.908534288406f, 0.104459829628f, 0.087320849299f, + 0.211962252855f, -0.284886807203f, -0.448164641857f, -0.607240796089f, + -0.738917350769f, -0.550279200077f, -0.866177439690f, -0.824003994465f, + -0.663138568401f, -0.780171751976f, -0.995288193226f, -1.362596273422f, + -1.621970295906f, -1.658103585243f}}; + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSmoothedLogMagnitudeSpectrum(input, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +// Checks that the computed DCT is within tolerance given hard-coded test input +// data. +TEST(RnnVadTest, ComputeDctWithinTolerance) { + constexpr std::array input = { + {0.232155621052f, 0.678957760334f, 0.220818966627f, -0.077363930643f, + -0.559227049351f, 0.432545185089f, 0.353900641203f, 0.398993015289f, + 0.409774333239f, 0.454977899790f, 0.300520688295f, -0.010286616161f, + 0.272525429726f, 0.098067551851f, 0.083649002016f, 0.046226885170f, + -0.033228103071f, 0.144773483276f, -0.117661058903f, -0.005628800020f, + -0.009547689930f, -0.045382082462f}}; + constexpr std::array expected_output = { + {0.697072803974f, 0.442710995674f, -0.293156713247f, -0.060711503029f, + 0.292050391436f, 0.489301353693f, 0.402255415916f, 0.134404733777f, + -0.086305990815f, -0.199605688453f, -0.234511867166f, -0.413774639368f, + -0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f, + -0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f, + -0.209233760834f, -0.128037497401f}}; + auto dct_table = ComputeDctTable(); + std::array computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeDct(input, dct_table, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc new file mode 100644 index 0000000000..324d694957 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestFeatureVectorSize = kNumBands + 3 * kNumLowerBands + 1; + +// Writes non-zero sample values. +void WriteTestData(rtc::ArrayView samples) { + for (int i = 0; rtc::SafeLt(i, samples.size()); ++i) { + samples[i] = i % 100; + } +} + +rtc::ArrayView GetHigherBandsSpectrum( + std::array* feature_vector) { + return {feature_vector->data() + kNumLowerBands, kNumBands - kNumLowerBands}; +} + +rtc::ArrayView GetAverage( + std::array* feature_vector) { + return {feature_vector->data(), kNumLowerBands}; +} + +rtc::ArrayView GetFirstDerivative( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands, kNumLowerBands}; +} + +rtc::ArrayView GetSecondDerivative( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands + kNumLowerBands, kNumLowerBands}; +} + +rtc::ArrayView GetCepstralCrossCorrelation( + std::array* feature_vector) { + return {feature_vector->data() + kNumBands + 2 * kNumLowerBands, + kNumLowerBands}; +} + +float* GetCepstralVariability( + std::array* feature_vector) { + return feature_vector->data() + kNumBands + 3 * kNumLowerBands; +} + +constexpr float kInitialFeatureVal = -9999.f; + +// Checks that silence is detected when the input signal is 0 and that the +// feature vector is written only if the input signal is not tagged as silence. +TEST(RnnVadTest, SpectralFeaturesWithAndWithoutSilence) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array samples; + rtc::ArrayView samples_view(samples); + bool is_silence; + std::array feature_vector; + + // Write an initial value in the feature vector to detect changes. + std::fill(feature_vector.begin(), feature_vector.end(), kInitialFeatureVal); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // With silence. + std::fill(samples.begin(), samples.end(), 0.f); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is expected, the output won't be overwritten. + EXPECT_TRUE(is_silence); + EXPECT_TRUE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); + + // With no silence. + WriteTestData(samples); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is not expected, the output will be overwritten. + EXPECT_FALSE(is_silence); + EXPECT_FALSE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); +} + +// Feeds a constant input signal and checks that: +// - the cepstral coefficients average does not change; +// - the derivatives are zero; +// - the cepstral variability score does not change. +TEST(RnnVadTest, CepstralFeaturesConstantAverageZeroDerivative) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array samples; + rtc::ArrayView samples_view(samples); + WriteTestData(samples); + + // Fill the spectral features with test data. + std::array feature_vector; + for (int i = 0; i < kCepstralCoeffsHistorySize; ++i) { + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + } + + // Feed the test data one last time but using a different output vector. + std::array feature_vector_last; + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector_last), + GetAverage(&feature_vector_last), + GetFirstDerivative(&feature_vector_last), + GetSecondDerivative(&feature_vector_last), + GetCepstralCrossCorrelation(&feature_vector_last), + GetCepstralVariability(&feature_vector_last)); + + // Average is unchanged. + ExpectEqualFloatArray({feature_vector.data(), kNumLowerBands}, + {feature_vector_last.data(), kNumLowerBands}); + // First and second derivatives are zero. + constexpr std::array zeros{}; + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands, kNumLowerBands}, zeros); + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + zeros); + // Variability is unchanged. + EXPECT_FLOAT_EQ(feature_vector[kNumBands + 3 * kNumLowerBands], + feature_vector_last[kNumBands + 3 * kNumLowerBands]); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h new file mode 100644 index 0000000000..d186479551 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +// Data structure to buffer the results of pair-wise comparisons between items +// stored in a ring buffer. Every time that the oldest item is replaced in the +// ring buffer, the new one is compared to the remaining items in the ring +// buffer. The results of such comparisons need to be buffered and automatically +// removed when one of the two corresponding items that have been compared is +// removed from the ring buffer. It is assumed that the comparison is symmetric +// and that comparing an item with itself is not needed. +template +class SymmetricMatrixBuffer { + static_assert(S > 2, ""); + + public: + SymmetricMatrixBuffer() = default; + SymmetricMatrixBuffer(const SymmetricMatrixBuffer&) = delete; + SymmetricMatrixBuffer& operator=(const SymmetricMatrixBuffer&) = delete; + ~SymmetricMatrixBuffer() = default; + // Sets the buffer values to zero. + void Reset() { + static_assert(std::is_arithmetic::value, + "Integral or floating point required."); + buf_.fill(0); + } + // Pushes the results from the comparison between the most recent item and + // those that are still in the ring buffer. The first element in `values` must + // correspond to the comparison between the most recent item and the second + // most recent one in the ring buffer, whereas the last element in `values` + // must correspond to the comparison between the most recent item and the + // oldest one in the ring buffer. + void Push(rtc::ArrayView values) { + // Move the lower-right sub-matrix of size (S-2) x (S-2) one row up and one + // column left. + std::memmove(buf_.data(), buf_.data() + S, (buf_.size() - S) * sizeof(T)); + // Copy new values in the last column in the right order. + for (int i = 0; rtc::SafeLt(i, values.size()); ++i) { + const int index = (S - 1 - i) * (S - 1) - 1; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, buf_.size()); + buf_[index] = values[i]; + } + } + // Reads the value that corresponds to comparison of two items in the ring + // buffer having delay `delay1` and `delay2`. The two arguments must not be + // equal and both must be in {0, ..., S - 1}. + T GetValue(int delay1, int delay2) const { + int row = S - 1 - delay1; + int col = S - 1 - delay2; + RTC_DCHECK_NE(row, col) << "The diagonal cannot be accessed."; + if (row > col) + std::swap(row, col); // Swap to access the upper-right triangular part. + RTC_DCHECK_LE(0, row); + RTC_DCHECK_LT(row, S - 1) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LE(1, col) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LT(col, S); + const int index = row * (S - 1) + (col - 1); + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, buf_.size()); + return buf_[index]; + } + + private: + // Encode an upper-right triangular matrix (excluding its diagonal) using a + // square matrix. This allows to move the data in Push() with one single + // operation. + std::array buf_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc new file mode 100644 index 0000000000..1509ca5ac1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template +void CheckSymmetry(const SymmetricMatrixBuffer* sym_matrix_buf) { + for (int row = 0; row < S - 1; ++row) + for (int col = row + 1; col < S; ++col) + EXPECT_EQ(sym_matrix_buf->GetValue(row, col), + sym_matrix_buf->GetValue(col, row)); +} + +using PairType = std::pair; + +// Checks that the symmetric matrix buffer contains any pair with a value equal +// to the given one. +template +bool CheckPairsWithValueExist( + const SymmetricMatrixBuffer* sym_matrix_buf, + const int value) { + for (int row = 0; row < S - 1; ++row) { + for (int col = row + 1; col < S; ++col) { + auto p = sym_matrix_buf->GetValue(row, col); + if (p.first == value || p.second == value) + return true; + } + } + return false; +} + +// Test that shows how to combine RingBuffer and SymmetricMatrixBuffer to +// efficiently compute pair-wise scores. This test verifies that the evolution +// of a SymmetricMatrixBuffer instance follows that of RingBuffer. +TEST(RnnVadTest, SymmetricMatrixBufferUseCase) { + // Instance a ring buffer which will be fed with a series of integer values. + constexpr int kRingBufSize = 10; + RingBuffer ring_buf; + // Instance a symmetric matrix buffer for the ring buffer above. It stores + // pairs of integers with which this test can easily check that the evolution + // of RingBuffer and SymmetricMatrixBuffer match. + SymmetricMatrixBuffer sym_matrix_buf; + for (int t = 1; t <= 100; ++t) { // Evolution steps. + SCOPED_TRACE(t); + const int t_removed = ring_buf.GetArrayView(kRingBufSize - 1)[0]; + ring_buf.Push({&t, 1}); + // The head of the ring buffer is `t`. + ASSERT_EQ(t, ring_buf.GetArrayView(0)[0]); + // Create the comparisons between `t` and the older elements in the ring + // buffer. + std::array new_comparions; + for (int i = 0; i < kRingBufSize - 1; ++i) { + // Start comparing `t` to the second newest element in the ring buffer. + const int delay = i + 1; + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + ASSERT_EQ(std::max(0, t - delay), t_prev); + // Compare the last element `t` with `t_prev`. + new_comparions[i].first = t_prev; + new_comparions[i].second = t; + } + // Push the new comparisons in the symmetric matrix buffer. + sym_matrix_buf.Push({new_comparions.data(), new_comparions.size()}); + // Tests. + CheckSymmetry(&sym_matrix_buf); + // Check that the pairs resulting from the content in the ring buffer are + // in the right position. + for (int delay1 = 0; delay1 < kRingBufSize - 1; ++delay1) { + for (int delay2 = delay1 + 1; delay2 < kRingBufSize; ++delay2) { + const auto t1 = ring_buf.GetArrayView(delay1)[0]; + const auto t2 = ring_buf.GetArrayView(delay2)[0]; + ASSERT_LE(t2, t1); + const auto p = sym_matrix_buf.GetValue(delay1, delay2); + EXPECT_EQ(p.first, t2); + EXPECT_EQ(p.second, t1); + } + } + // Check that every older element in the ring buffer still has a + // corresponding pair in the symmetric matrix buffer. + for (int delay = 1; delay < kRingBufSize; ++delay) { + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + EXPECT_TRUE(CheckPairsWithValueExist(&sym_matrix_buf, t_prev)); + } + // Check that the element removed from the ring buffer has no corresponding + // pairs in the symmetric matrix buffer. + if (t > kRingBufSize - 1) { + EXPECT_FALSE(CheckPairsWithValueExist(&sym_matrix_buf, t_removed)); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc new file mode 100644 index 0000000000..857a9f2706 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// File reader for binary files that contain a sequence of values with +// arithmetic type `T`. The values of type `T` that are read are cast to float. +template +class FloatFileReader : public FileReader { + public: + static_assert(std::is_arithmetic::value, ""); + explicit FloatFileReader(absl::string_view filename) + : is_(std::string(filename), std::ios::binary | std::ios::ate), + size_(is_.tellg() / sizeof(T)) { + RTC_CHECK(is_); + SeekBeginning(); + } + FloatFileReader(const FloatFileReader&) = delete; + FloatFileReader& operator=(const FloatFileReader&) = delete; + ~FloatFileReader() = default; + + int size() const override { return size_; } + bool ReadChunk(rtc::ArrayView dst) override { + const std::streamsize bytes_to_read = dst.size() * sizeof(T); + if (std::is_same::value) { + is_.read(reinterpret_cast(dst.data()), bytes_to_read); + } else { + buffer_.resize(dst.size()); + is_.read(reinterpret_cast(buffer_.data()), bytes_to_read); + std::transform(buffer_.begin(), buffer_.end(), dst.begin(), + [](const T& v) -> float { return static_cast(v); }); + } + return is_.gcount() == bytes_to_read; + } + bool ReadValue(float& dst) override { return ReadChunk({&dst, 1}); } + void SeekForward(int hop) override { is_.seekg(hop * sizeof(T), is_.cur); } + void SeekBeginning() override { is_.seekg(0, is_.beg); } + + private: + std::ifstream is_; + const int size_; + std::vector buffer_; +}; + +} // namespace + +using webrtc::test::ResourcePath; + +void ExpectEqualFloatArray(rtc::ArrayView expected, + rtc::ArrayView computed) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_FLOAT_EQ(expected[i], computed[i]); + } +} + +void ExpectNearAbsolute(rtc::ArrayView expected, + rtc::ArrayView computed, + float tolerance) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_NEAR(expected[i], computed[i], tolerance); + } +} + +std::unique_ptr CreatePcmSamplesReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/samples", + "pcm")); +} + +ChunksFileReader CreatePitchBuffer24kHzReader() { + auto reader = std::make_unique>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_buf_24k", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kBufSize24kHz); + return {/*chunk_size=*/kBufSize24kHz, num_chunks, std::move(reader)}; +} + +ChunksFileReader CreateLpResidualAndPitchInfoReader() { + constexpr int kPitchInfoSize = 2; // Pitch period and strength. + constexpr int kChunkSize = kBufSize24kHz + kPitchInfoSize; + auto reader = std::make_unique>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_lp_res", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kChunkSize); + return {kChunkSize, num_chunks, std::move(reader)}; +} + +std::unique_ptr CreateGruInputReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/gru_in", + "dat")); +} + +std::unique_ptr CreateVadProbsReader() { + return std::make_unique>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob", + "dat")); +} + +PitchTestData::PitchTestData() { + FloatFileReader reader( + /*filename=*/ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_search_int", "dat")); + reader.ReadChunk(pitch_buffer_24k_); + reader.ReadChunk(square_energies_24k_); + reader.ReadChunk(auto_correlation_12k_); + // Reverse the order of the squared energy values. + // Required after the WebRTC CL 191703 which switched to forward computation. + std::reverse(square_energies_24k_.begin(), square_energies_24k_.end()); +} + +PitchTestData::~PitchTestData() = default; + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h new file mode 100644 index 0000000000..e64b7b7ecd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +constexpr float kFloatMin = std::numeric_limits::min(); + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that the values in the pair do not match. +void ExpectEqualFloatArray(rtc::ArrayView expected, + rtc::ArrayView computed); + +// Fails for every pair from two equally sized rtc::ArrayView views such +// that their absolute error is above a given threshold. +void ExpectNearAbsolute(rtc::ArrayView expected, + rtc::ArrayView computed, + float tolerance); + +// File reader interface. +class FileReader { + public: + virtual ~FileReader() = default; + // Number of values in the file. + virtual int size() const = 0; + // Reads `dst.size()` float values into `dst`, advances the internal file + // position according to the number of read bytes and returns true if the + // values are correctly read. If the number of remaining bytes in the file is + // not sufficient to read `dst.size()` float values, `dst` is partially + // modified and false is returned. + virtual bool ReadChunk(rtc::ArrayView dst) = 0; + // Reads a single float value, advances the internal file position according + // to the number of read bytes and returns true if the value is correctly + // read. If the number of remaining bytes in the file is not sufficient to + // read one float, `dst` is not modified and false is returned. + virtual bool ReadValue(float& dst) = 0; + // Advances the internal file position by `hop` float values. + virtual void SeekForward(int hop) = 0; + // Resets the internal file position to BOF. + virtual void SeekBeginning() = 0; +}; + +// File reader for files that contain `num_chunks` chunks with size equal to +// `chunk_size`. +struct ChunksFileReader { + const int chunk_size; + const int num_chunks; + std::unique_ptr reader; +}; + +// Creates a reader for the PCM S16 samples file. +std::unique_ptr CreatePcmSamplesReader(); + +// Creates a reader for the 24 kHz pitch buffer test data. +ChunksFileReader CreatePitchBuffer24kHzReader(); + +// Creates a reader for the LP residual and pitch information test data. +ChunksFileReader CreateLpResidualAndPitchInfoReader(); + +// Creates a reader for the sequence of GRU input vectors. +std::unique_ptr CreateGruInputReader(); + +// Creates a reader for the VAD probabilities test data. +std::unique_ptr CreateVadProbsReader(); + +// Class to retrieve a test pitch buffer content and the expected output for the +// analysis steps. +class PitchTestData { + public: + PitchTestData(); + ~PitchTestData(); + rtc::ArrayView PitchBuffer24kHzView() const { + return pitch_buffer_24k_; + } + rtc::ArrayView SquareEnergies24kHzView() + const { + return square_energies_24k_; + } + rtc::ArrayView AutoCorrelation12kHzView() const { + return auto_correlation_12k_; + } + + private: + std::array pitch_buffer_24k_; + std::array square_energies_24k_; + std::array auto_correlation_12k_; +}; + +// Writer for binary files. +class FileWriter { + public: + explicit FileWriter(absl::string_view file_path) + : os_(std::string(file_path), std::ios::binary) {} + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; + ~FileWriter() = default; + void WriteChunk(rtc::ArrayView value) { + const std::streamsize bytes_to_write = value.size() * sizeof(float); + os_.write(reinterpret_cast(value.data()), bytes_to_write); + } + + private: + std::ofstream os_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h new file mode 100644 index 0000000000..47f681196a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { + +// Provides optimizations for mathematical operations having vectors as +// operand(s). +class VectorMath { + public: + explicit VectorMath(AvailableCpuFeatures cpu_features) + : cpu_features_(cpu_features) {} + + // Computes the dot product between two equally sized vectors. + float DotProduct(rtc::ArrayView x, + rtc::ArrayView y) const { + RTC_DCHECK_EQ(x.size(), y.size()); +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (cpu_features_.avx2) { + return DotProductAvx2(x, y); + } else if (cpu_features_.sse2) { + __m128 accumulator = _mm_setzero_ps(); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m128 x_i = _mm_loadu_ps(&x[i]); + const __m128 y_i = _mm_loadu_ps(&y[i]); + // Multiply-add. + const __m128 z_j = _mm_mul_ps(x_i, y_i); + accumulator = _mm_add_ps(accumulator, z_j); + } + // Reduce `accumulator` by addition. + __m128 high = _mm_movehl_ps(accumulator, accumulator); + accumulator = _mm_add_ps(accumulator, high); + high = _mm_shuffle_ps(accumulator, accumulator, 1); + accumulator = _mm_add_ps(accumulator, high); + float dot_product = _mm_cvtss_f32(accumulator); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#elif defined(WEBRTC_HAS_NEON) && defined(WEBRTC_ARCH_ARM64) + if (cpu_features_.neon) { + float32x4_t accumulator = vdupq_n_f32(0.f); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const float32x4_t x_i = vld1q_f32(&x[i]); + const float32x4_t y_i = vld1q_f32(&y[i]); + accumulator = vfmaq_f32(accumulator, x_i, y_i); + } + // Reduce `accumulator` by addition. + const float32x2_t tmp = + vpadd_f32(vget_low_f32(accumulator), vget_high_f32(accumulator)); + float dot_product = vget_lane_f32(vpadd_f32(tmp, vrev64_f32(tmp)), 0); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#endif + return std::inner_product(x.begin(), x.end(), y.begin(), 0.f); + } + + private: + float DotProductAvx2(rtc::ArrayView x, + rtc::ArrayView y) const; + + const AvailableCpuFeatures cpu_features_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc new file mode 100644 index 0000000000..e4d246d9ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace rnn_vad { + +float VectorMath::DotProductAvx2(rtc::ArrayView x, + rtc::ArrayView y) const { + RTC_DCHECK(cpu_features_.avx2); + RTC_DCHECK_EQ(x.size(), y.size()); + __m256 accumulator = _mm256_setzero_ps(); + constexpr int kBlockSizeLog2 = 3; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m256 x_i = _mm256_loadu_ps(&x[i]); + const __m256 y_i = _mm256_loadu_ps(&y[i]); + accumulator = _mm256_fmadd_ps(x_i, y_i, accumulator); + } + // Reduce `accumulator` by addition. + __m128 high = _mm256_extractf128_ps(accumulator, 1); + __m128 low = _mm256_extractf128_ps(accumulator, 0); + low = _mm_add_ps(high, low); + high = _mm_movehl_ps(high, low); + low = _mm_add_ps(high, low); + high = _mm_shuffle_ps(low, low, 1); + low = _mm_add_ss(high, low); + float dot_product = _mm_cvtss_f32(low); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; i < rtc::dchecked_cast(x.size()); + ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build new file mode 100644 index 0000000000..275c512cf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build @@ -0,0 +1,185 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +CXXFLAGS += [ + "-mavx2", + "-mfma" +] + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +Library("vector_math_avx2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build new file mode 100644 index 0000000000..263ec679e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build @@ -0,0 +1,216 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vector_math_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc new file mode 100644 index 0000000000..45fd65d61e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kSizeOfX = 19; +constexpr float kX[kSizeOfX] = { + 0.31593041f, 0.9350786f, -0.25252445f, -0.86956251f, -0.9673632f, + 0.54571901f, -0.72504495f, -0.79509912f, -0.25525012f, -0.73340473f, + 0.15747377f, -0.04370565f, 0.76135145f, -0.57239645f, 0.68616848f, + 0.3740298f, 0.34710799f, -0.92207423f, 0.10738454f}; +constexpr int kSizeOfXSubSpan = 16; +static_assert(kSizeOfXSubSpan < kSizeOfX, ""); +constexpr float kEnergyOfX = 7.315563958160327f; +constexpr float kEnergyOfXSubspan = 6.333327669592963f; + +class VectorMathParametrization + : public ::testing::TestWithParam {}; + +TEST_P(VectorMathParametrization, TestDotProduct) { + VectorMath vector_math(/*cpu_features=*/GetParam()); + EXPECT_FLOAT_EQ(vector_math.DotProduct(kX, kX), kEnergyOfX); + EXPECT_FLOAT_EQ( + vector_math.DotProduct({kX, kSizeOfXSubSpan}, {kX, kSizeOfXSubSpan}), + kEnergyOfXSubspan); +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false}); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + VectorMathParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc new file mode 100644 index 0000000000..961baf4cd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kPeakEnveloperSuperFrameLengthMs = 400; +constexpr float kMinMarginDb = 12.0f; +constexpr float kMaxMarginDb = 25.0f; +constexpr float kAttack = 0.9988493699365052f; +constexpr float kDecay = 0.9997697679981565f; + +// Saturation protector state. Defined outside of `SaturationProtectorImpl` to +// implement check-point and restore ops. +struct SaturationProtectorState { + bool operator==(const SaturationProtectorState& s) const { + return headroom_db == s.headroom_db && + peak_delay_buffer == s.peak_delay_buffer && + max_peaks_dbfs == s.max_peaks_dbfs && + time_since_push_ms == s.time_since_push_ms; + } + inline bool operator!=(const SaturationProtectorState& s) const { + return !(*this == s); + } + + float headroom_db; + SaturationProtectorBuffer peak_delay_buffer; + float max_peaks_dbfs; + int time_since_push_ms; // Time since the last ring buffer push operation. +}; + +// Resets the saturation protector state. +void ResetSaturationProtectorState(float initial_headroom_db, + SaturationProtectorState& state) { + state.headroom_db = initial_headroom_db; + state.peak_delay_buffer.Reset(); + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; +} + +// Updates `state` by analyzing the estimated speech level `speech_level_dbfs` +// and the peak level `peak_dbfs` for an observed frame. `state` must not be +// modified without calling this function. +void UpdateSaturationProtectorState(float peak_dbfs, + float speech_level_dbfs, + SaturationProtectorState& state) { + // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. + state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); + state.time_since_push_ms += kFrameDurationMs; + if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { + // Push `max_peaks_dbfs` back into the ring buffer. + state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); + // Reset. + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; + } + + // Update the headroom by comparing the estimated speech level and the delayed + // max speech peak. + const float delayed_peak_dbfs = + state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); + const float difference_db = delayed_peak_dbfs - speech_level_dbfs; + if (difference_db > state.headroom_db) { + // Attack. + state.headroom_db = + state.headroom_db * kAttack + difference_db * (1.0f - kAttack); + } else { + // Decay. + state.headroom_db = + state.headroom_db * kDecay + difference_db * (1.0f - kDecay); + } + + state.headroom_db = + rtc::SafeClamp(state.headroom_db, kMinMarginDb, kMaxMarginDb); +} + +// Saturation protector which recommends a headroom based on the recent peaks. +class SaturationProtectorImpl : public SaturationProtector { + public: + explicit SaturationProtectorImpl(float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + initial_headroom_db_(initial_headroom_db), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { + Reset(); + } + SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; + SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; + ~SaturationProtectorImpl() = default; + + float HeadroomDb() override { return headroom_db_; } + + void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) override { + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to + // update the state, we need to decide whether to discard or confirm the + // updates based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech + // frames. Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, + preliminary_state_); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the headroom. + headroom_db_ = preliminary_state_.headroom_db; + } + } + DumpDebugData(); + } + + void Reset() override { + num_adjacent_speech_frames_ = 0; + headroom_db_ = initial_headroom_db_; + ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); + ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); + } + + private: + void DumpDebugData() { + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_preliminary_max_peak_dbfs", + preliminary_state_.max_peaks_dbfs); + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_reliable_max_peak_dbfs", + reliable_state_.max_peaks_dbfs); + } + + ApmDataDumper* const apm_data_dumper_; + const float initial_headroom_db_; + const int adjacent_speech_frames_threshold_; + int num_adjacent_speech_frames_; + float headroom_db_; + SaturationProtectorState preliminary_state_; + SaturationProtectorState reliable_state_; +}; + +} // namespace + +std::unique_ptr CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) { + return std::make_unique( + initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h new file mode 100644 index 0000000000..ef22145d5f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ + +#include + +namespace webrtc { +class ApmDataDumper; + +// Saturation protector. Analyzes peak levels and recommends a headroom to +// reduce the chances of clipping. +class SaturationProtector { + public: + virtual ~SaturationProtector() = default; + + // Returns the recommended headroom in dB. + virtual float HeadroomDb() = 0; + + // Analyzes the peak level of a 10 ms frame along with its speech probability + // and the current speech level estimate to update the recommended headroom. + virtual void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) = 0; + + // Resets the internal state. + virtual void Reset() = 0; +}; + +// Creates a saturation protector that starts at `initial_headroom_db`. +std::unique_ptr CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc new file mode 100644 index 0000000000..41efdad2c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { + +SaturationProtectorBuffer::SaturationProtectorBuffer() = default; + +SaturationProtectorBuffer::~SaturationProtectorBuffer() = default; + +bool SaturationProtectorBuffer::operator==( + const SaturationProtectorBuffer& b) const { + RTC_DCHECK_LE(size_, buffer_.size()); + RTC_DCHECK_LE(b.size_, b.buffer_.size()); + if (size_ != b.size_) { + return false; + } + for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_; + ++i, ++i0, ++i1) { + if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) { + return false; + } + } + return true; +} + +int SaturationProtectorBuffer::Capacity() const { + return buffer_.size(); +} + +int SaturationProtectorBuffer::Size() const { + return size_; +} + +void SaturationProtectorBuffer::Reset() { + next_ = 0; + size_ = 0; +} + +void SaturationProtectorBuffer::PushBack(float v) { + RTC_DCHECK_GE(next_, 0); + RTC_DCHECK_GE(size_, 0); + RTC_DCHECK_LT(next_, buffer_.size()); + RTC_DCHECK_LE(size_, buffer_.size()); + buffer_[next_++] = v; + if (rtc::SafeEq(next_, buffer_.size())) { + next_ = 0; + } + if (rtc::SafeLt(size_, buffer_.size())) { + size_++; + } +} + +absl::optional SaturationProtectorBuffer::Front() const { + if (size_ == 0) { + return absl::nullopt; + } + RTC_DCHECK_LT(FrontIndex(), buffer_.size()); + return buffer_[FrontIndex()]; +} + +int SaturationProtectorBuffer::FrontIndex() const { + return rtc::SafeEq(size_, buffer_.size()) ? next_ : 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h new file mode 100644 index 0000000000..e17d0998c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +// Ring buffer for the saturation protector which only supports (i) push back +// and (ii) read oldest item. +class SaturationProtectorBuffer { + public: + SaturationProtectorBuffer(); + ~SaturationProtectorBuffer(); + + bool operator==(const SaturationProtectorBuffer& b) const; + inline bool operator!=(const SaturationProtectorBuffer& b) const { + return !(*this == b); + } + + // Maximum number of values that the buffer can contain. + int Capacity() const; + + // Number of values in the buffer. + int Size() const; + + void Reset(); + + // Pushes back `v`. If the buffer is full, the oldest value is replaced. + void PushBack(float v); + + // Returns the oldest item in the buffer. Returns an empty value if the + // buffer is empty. + absl::optional Front() const; + + private: + int FrontIndex() const; + // `buffer_` has `size_` elements (up to the size of `buffer_`) and `next_` is + // the position where the next new value is written in `buffer_`. + std::array buffer_; + int next_ = 0; + int size_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc new file mode 100644 index 0000000000..22187bf027 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +TEST(GainController2SaturationProtectorBuffer, Init) { + SaturationProtectorBuffer b; + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +TEST(GainController2SaturationProtectorBuffer, PushBack) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + EXPECT_EQ(b.Size(), 1); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); +} + +TEST(GainController2SaturationProtectorBuffer, Reset) { + SaturationProtectorBuffer b; + b.PushBack(123.0f); + b.Reset(); + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +// Checks that the front value does not change until the ring buffer gets full. +TEST(GainController2SaturationProtectorBuffer, FrontUntilBufferIsFull) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + for (int i = 1; i < b.Capacity(); ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); + b.PushBack(kValue + i); + } +} + +// Checks that when the buffer is full it behaves as a shift register. +TEST(GainController2SaturationProtectorBuffer, FrontIsDelayed) { + SaturationProtectorBuffer b; + // Fill the buffer. + for (int i = 0; i < b.Capacity(); ++i) { + b.PushBack(i); + } + // The ring buffer should now behave as a shift register with a delay equal to + // its capacity. + for (int i = b.Capacity(); i < 2 * b.Capacity() + 1; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(i - b.Capacity()))); + b.PushBack(i); + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build new file mode 100644 index 0000000000..a1e6f309bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_gn/moz.build @@ -0,0 +1,234 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("saturation_protector_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc new file mode 100644 index 0000000000..3b104be8cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInitialHeadroomDb = 20.0f; +constexpr int kNoAdjacentSpeechFramesRequired = 1; +constexpr float kMaxSpeechProbability = 1.0f; + +// Calls `Analyze(speech_probability, peak_dbfs, speech_level_dbfs)` +// `num_iterations` times on `saturation_protector` and return the largest +// headroom difference between two consecutive calls. +float RunOnConstantLevel(int num_iterations, + float speech_probability, + float peak_dbfs, + float speech_level_dbfs, + SaturationProtector& saturation_protector) { + float last_headroom = saturation_protector.HeadroomDb(); + float max_difference = 0.0f; + for (int i = 0; i < num_iterations; ++i) { + saturation_protector.Analyze(speech_probability, peak_dbfs, + speech_level_dbfs); + const float new_headroom = saturation_protector.HeadroomDb(); + max_difference = + std::max(max_difference, std::fabs(new_headroom - last_headroom)); + last_headroom = new_headroom; + } + return max_difference; +} + +// Checks that the returned headroom value is correctly reset. +TEST(GainController2SaturationProtector, Reset) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Make sure that there are side-effects. + ASSERT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); + saturation_protector->Reset(); + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +// Checks that the estimate converges to the ratio between peaks and level +// estimator values after a while. +TEST(GainController2SaturationProtector, EstimatesCrestRatio) { + constexpr int kNumIterations = 2000; + constexpr float kPeakLevelDbfs = -20.0f; + constexpr float kCrestFactorDb = kInitialHeadroomDb + 1.0f; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + const float kMaxDifferenceDb = + 0.5f * std::fabs(kInitialHeadroomDb - kCrestFactorDb); + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb, + kMaxDifferenceDb); +} + +// Checks that the headroom does not change too quickly. +TEST(GainController2SaturationProtector, ChangeSlowly) { + constexpr int kNumIterations = 1000; + constexpr float kPeakLevelDbfs = -20.f; + constexpr float kCrestFactorDb = kInitialHeadroomDb - 5.f; + constexpr float kOtherCrestFactorDb = kInitialHeadroomDb; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + constexpr float kOtherSpeechLevelDbfs = kPeakLevelDbfs - kOtherCrestFactorDb; + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + float max_difference_db = + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + max_difference_db = std::max( + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kOtherSpeechLevelDbfs, *saturation_protector), + max_difference_db); + constexpr float kMaxChangeSpeedDbPerSecond = 0.5f; // 1 db / 2 seconds. + EXPECT_LE(max_difference_db, + kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs); +} + +class SaturationProtectorParametrization + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // No adaptation expected. + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Adaptation expected. + EXPECT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + SaturationProtectorParametrization, + ::testing::Values(2, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc new file mode 100644 index 0000000000..7bf3252116 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_level_estimator.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +float ClampLevelEstimateDbfs(float level_estimate_dbfs) { + return rtc::SafeClamp(level_estimate_dbfs, -90.0f, 30.0f); +} + +// Returns the initial speech level estimate needed to apply the initial gain. +float GetInitialSpeechLevelEstimateDbfs( + const AudioProcessing::Config::GainController2::AdaptiveDigital& config) { + return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb - + config.initial_gain_db - config.headroom_db); +} + +} // namespace + +bool SpeechLevelEstimator::LevelEstimatorState::operator==( + const SpeechLevelEstimator::LevelEstimatorState& b) const { + return time_to_confidence_ms == b.time_to_confidence_ms && + level_dbfs.numerator == b.level_dbfs.numerator && + level_dbfs.denominator == b.level_dbfs.denominator; +} + +float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { + RTC_DCHECK_NE(denominator, 0.f); + return numerator / denominator; +} + +SpeechLevelEstimator::SpeechLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper_(apm_data_dumper), + initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + level_dbfs_(initial_speech_level_dbfs_), + // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume + // controller temporal dependency removed. + is_confident_(false) { + RTC_DCHECK(apm_data_dumper_); + RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1); + Reset(); +} + +void SpeechLevelEstimator::Update(float rms_dbfs, + float peak_dbfs, + float speech_probability) { + RTC_DCHECK_GT(rms_dbfs, -150.0f); + RTC_DCHECK_LT(rms_dbfs, 50.0f); + RTC_DCHECK_GT(peak_dbfs, -150.0f); + RTC_DCHECK_LT(peak_dbfs, 50.0f); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to update + // the state, we need to decide whether to discard or confirm the updates + // based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech frames. + // Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0); + const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0; + if (!buffer_is_full) { + preliminary_state_.time_to_confidence_ms -= kFrameDurationMs; + } + // Weighted average of levels with speech probability as weight. + RTC_DCHECK_GT(speech_probability, 0.0f); + const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.0f; + preliminary_state_.level_dbfs.numerator = + preliminary_state_.level_dbfs.numerator * leak_factor + + rms_dbfs * speech_probability; + preliminary_state_.level_dbfs.denominator = + preliminary_state_.level_dbfs.denominator * leak_factor + + speech_probability; + + const float level_dbfs = preliminary_state_.level_dbfs.GetRatio(); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the last level estimation. + level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs); + } + } + UpdateIsConfident(); + DumpDebugData(); +} + +void SpeechLevelEstimator::UpdateIsConfident() { + if (adjacent_speech_frames_threshold_ == 1) { + // Ignore `reliable_state_` when a single frame is enough to update the + // level estimate (because it is not used). + is_confident_ = preliminary_state_.time_to_confidence_ms == 0; + return; + } + // Once confident, it remains confident. + RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 || + preliminary_state_.time_to_confidence_ms == 0); + // During the first long enough speech sequence, `reliable_state_` must be + // ignored since `preliminary_state_` is used. + is_confident_ = + reliable_state_.time_to_confidence_ms == 0 || + (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ && + preliminary_state_.time_to_confidence_ms == 0); +} + +void SpeechLevelEstimator::Reset() { + ResetLevelEstimatorState(preliminary_state_); + ResetLevelEstimatorState(reliable_state_); + level_dbfs_ = initial_speech_level_dbfs_; + num_adjacent_speech_frames_ = 0; +} + +void SpeechLevelEstimator::ResetLevelEstimatorState( + LevelEstimatorState& state) const { + state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs; + state.level_dbfs.numerator = initial_speech_level_dbfs_; + state.level_dbfs.denominator = 1.0f; +} + +void SpeechLevelEstimator::DumpDebugData() const { + if (!apm_data_dumper_) + return; + apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_); + apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_num_adjacent_speech_frames", + num_adjacent_speech_frames_); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_num", + preliminary_state_.level_dbfs.numerator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_den", + preliminary_state_.level_dbfs.denominator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms", + preliminary_state_.time_to_confidence_ms); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_reliable_time_to_confidence_ms", + reliable_state_.time_to_confidence_ms); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h new file mode 100644 index 0000000000..4d9f106ba9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ + +#include + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +class ApmDataDumper; + +// Active speech level estimator based on the analysis of the following +// framewise properties: RMS level (dBFS), peak level (dBFS), speech +// probability. +class SpeechLevelEstimator { + public: + SpeechLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold); + SpeechLevelEstimator(const SpeechLevelEstimator&) = delete; + SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete; + + // Updates the level estimation. + void Update(float rms_dbfs, float peak_dbfs, float speech_probability); + // Returns the estimated speech plus noise level. + float level_dbfs() const { return level_dbfs_; } + // Returns true if the estimator is confident on its current estimate. + bool is_confident() const { return is_confident_; } + + void Reset(); + + private: + // Part of the level estimator state used for check-pointing and restore ops. + struct LevelEstimatorState { + bool operator==(const LevelEstimatorState& s) const; + inline bool operator!=(const LevelEstimatorState& s) const { + return !(*this == s); + } + // TODO(bugs.webrtc.org/7494): Remove `time_to_confidence_ms` if redundant. + int time_to_confidence_ms; + struct Ratio { + float numerator; + float denominator; + float GetRatio() const; + } level_dbfs; + }; + static_assert(std::is_trivially_copyable::value, ""); + + void UpdateIsConfident(); + + void ResetLevelEstimatorState(LevelEstimatorState& state) const; + + void DumpDebugData() const; + + ApmDataDumper* const apm_data_dumper_; + + const float initial_speech_level_dbfs_; + const int adjacent_speech_frames_threshold_; + LevelEstimatorState preliminary_state_; + LevelEstimatorState reliable_state_; + float level_dbfs_; + bool is_confident_; + int num_adjacent_speech_frames_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build new file mode 100644 index 0000000000..bb1dbc67b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("speech_level_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc new file mode 100644 index 0000000000..e1c5f85434 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_level_estimator_unittest.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_level_estimator.h" + +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +// Number of speech frames that the level estimator must observe in order to +// become confident about the estimated level. +constexpr int kNumFramesToConfidence = + kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs; +static_assert(kNumFramesToConfidence > 0, ""); + +constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f; + +// Provides the `vad_level` value `num_iterations` times to `level_estimator`. +void RunOnConstantLevel(int num_iterations, + float rms_dbfs, + float peak_dbfs, + float speech_probability, + SpeechLevelEstimator& level_estimator) { + for (int i = 0; i < num_iterations; ++i) { + level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability); + } +} + +constexpr float kNoSpeechProbability = 0.0f; +constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f; +constexpr float kMaxSpeechProbability = 1.0f; + +// Level estimator with data dumper. +struct TestLevelEstimator { + explicit TestLevelEstimator(int adjacent_speech_frames_threshold) + : data_dumper(0), + estimator(std::make_unique( + &data_dumper, + AdaptiveDigitalConfig{}, + adjacent_speech_frames_threshold)), + initial_speech_level_dbfs(estimator->level_dbfs()), + level_rms_dbfs(initial_speech_level_dbfs / 2.0f), + level_peak_dbfs(initial_speech_level_dbfs / 3.0f) { + RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs); + RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs); + RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f) + << "Adjust `level_rms_dbfs` so that the difference from the initial " + "level is wide enough for the tests"; + } + ApmDataDumper data_dumper; + std::unique_ptr estimator; + const float initial_speech_level_dbfs; + const float level_rms_dbfs; + const float level_peak_dbfs; +}; + +// Checks that the level estimator converges to a constant input speech level. +TEST(GainController2SpeechLevelEstimator, LevelStabilizes) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs, + 0.1f); +} + +// Checks that the level controller does not become confident when too few +// speech frames are observed. +TEST(GainController2SpeechLevelEstimator, IsNotConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_FALSE(level_estimator.estimator->is_confident()); +} + +// Checks that the level controller becomes confident when enough speech frames +// are observed. +TEST(GainController2SpeechLevelEstimator, IsConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_TRUE(level_estimator.estimator->is_confident()); +} + +// Checks that the estimated level is not affected by the level of non-speech +// frames. +TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Simulate speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + // Simulate full-scale non-speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f, + kNoSpeechProbability, *level_estimator.estimator); + // No estimated level change is expected. + EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + estimated_level_dbfs); +} + +// Checks the convergence speed of the estimator before it becomes confident. +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +// Checks the convergence speed of the estimator after it becomes confident. +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Reach confidence using the initial level estimate. + RunOnConstantLevel( + /*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs, + /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f, + kMaxSpeechProbability, *level_estimator.estimator); + // No estimate change should occur, but confidence is achieved. + ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + level_estimator.initial_speech_level_dbfs); + ASSERT_TRUE(level_estimator.estimator->is_confident()); + // After confidence. + constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds. + static_assert( + kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, ""); + RunOnConstantLevel( + /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames, + level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, + kMaxSpeechProbability, *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +class SpeechLevelEstimatorParametrization + : public ::testing::TestWithParam { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) { + SCOPED_TRACE(i); + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); + } + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kLowSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); +} + +TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + } + EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + SpeechLevelEstimatorParametrization, + ::testing::Values(1, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc new file mode 100644 index 0000000000..7746f6c000 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.cc @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_probability_buffer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kActivityThreshold = 0.9f; +constexpr int kNumAnalysisFrames = 100; +// We use 12 in AGC2 adaptive digital, but with a slightly different logic. +constexpr int kTransientWidthThreshold = 7; + +} // namespace + +SpeechProbabilityBuffer::SpeechProbabilityBuffer( + float low_probability_threshold) + : low_probability_threshold_(low_probability_threshold), + probabilities_(kNumAnalysisFrames) { + RTC_DCHECK_GE(low_probability_threshold, 0.0f); + RTC_DCHECK_LE(low_probability_threshold, 1.0f); + RTC_DCHECK(!probabilities_.empty()); +} + +void SpeechProbabilityBuffer::Update(float probability) { + // Remove the oldest entry if the circular buffer is full. + if (buffer_is_full_) { + const float oldest_probability = probabilities_[buffer_index_]; + sum_probabilities_ -= oldest_probability; + } + + // Check for transients. + if (probability <= low_probability_threshold_) { + // Set a probability lower than the threshold to zero. + probability = 0.0f; + + // Check if this has been a transient. + if (num_high_probability_observations_ <= kTransientWidthThreshold) { + RemoveTransient(); + } + num_high_probability_observations_ = 0; + } else if (num_high_probability_observations_ <= kTransientWidthThreshold) { + ++num_high_probability_observations_; + } + + // Update the circular buffer and the current sum. + probabilities_[buffer_index_] = probability; + sum_probabilities_ += probability; + + // Increment the buffer index and check for wrap-around. + if (++buffer_index_ >= kNumAnalysisFrames) { + buffer_index_ = 0; + buffer_is_full_ = true; + } +} + +void SpeechProbabilityBuffer::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // `kTransientWidthThreshold` or there has not been any transient. + RTC_DCHECK_LE(num_high_probability_observations_, kTransientWidthThreshold); + + // Replace previously added probabilities with zero. + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : (kNumAnalysisFrames - 1); + + while (num_high_probability_observations_-- > 0) { + sum_probabilities_ -= probabilities_[index]; + probabilities_[index] = 0.0f; + + // Update the circular buffer index. + index = (index > 0) ? (index - 1) : (kNumAnalysisFrames - 1); + } +} + +bool SpeechProbabilityBuffer::IsActiveSegment() const { + if (!buffer_is_full_) { + return false; + } + if (sum_probabilities_ < kActivityThreshold * kNumAnalysisFrames) { + return false; + } + return true; +} + +void SpeechProbabilityBuffer::Reset() { + sum_probabilities_ = 0.0f; + + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + num_high_probability_observations_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h new file mode 100644 index 0000000000..3056a3eeab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ + +#include + +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +// This class implements a circular buffer that stores speech probabilities +// for a speech segment and estimates speech activity for that segment. +class SpeechProbabilityBuffer { + public: + // Ctor. The value of `low_probability_threshold` is required to be on the + // range [0.0f, 1.0f]. + explicit SpeechProbabilityBuffer(float low_probability_threshold); + ~SpeechProbabilityBuffer() {} + SpeechProbabilityBuffer(const SpeechProbabilityBuffer&) = delete; + SpeechProbabilityBuffer& operator=(const SpeechProbabilityBuffer&) = delete; + + // Adds `probability` in the buffer and computes an updatds sum of the buffer + // probabilities. Value of `probability` is required to be on the range + // [0.0f, 1.0f]. + void Update(float probability); + + // Resets the histogram, forgets the past. + void Reset(); + + // Returns true if the segment is active (a long enough segment with an + // average speech probability above `low_probability_threshold`). + bool IsActiveSegment() const; + + private: + void RemoveTransient(); + + // Use only for testing. + float GetSumProbabilities() const { return sum_probabilities_; } + + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterInitialization); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, CheckSumAfterUpdate); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, CheckSumAfterReset); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterTransientNotRemoved); + FRIEND_TEST_ALL_PREFIXES(SpeechProbabilityBufferTest, + CheckSumAfterTransientRemoved); + + const float low_probability_threshold_; + + // Sum of probabilities stored in `probabilities_`. Must be updated if + // `probabilities_` is updated. + float sum_probabilities_ = 0.0f; + + // Circular buffer for probabilities. + std::vector probabilities_; + + // Current index of the circular buffer, where the newest data will be written + // to, therefore, pointing to the oldest data if buffer is full. + int buffer_index_ = 0; + + // Indicates if the buffer is full and adding a new value removes the oldest + // value. + int buffer_is_full_ = false; + + int num_high_probability_observations_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_PROBABILITY_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc new file mode 100644 index 0000000000..89cc209d9d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/speech_probability_buffer_unittest.cc @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/speech_probability_buffer.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kAbsError = 0.001f; +constexpr float kActivityThreshold = 0.9f; +constexpr float kLowProbabilityThreshold = 0.2f; +constexpr int kNumAnalysisFrames = 100; + +} // namespace + +TEST(SpeechProbabilityBufferTest, CheckSumAfterInitialization) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + EXPECT_EQ(buffer.GetSumProbabilities(), 0.0f); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterUpdate) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.7f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 0.7f, kAbsError); + + buffer.Update(0.6f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 1.3f, kAbsError); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_NEAR(buffer.GetSumProbabilities(), 99.6f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterReset) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.7f); + buffer.Update(0.6f); + buffer.Update(0.3f); + + EXPECT_GT(buffer.GetSumProbabilities(), 0.0f); + + buffer.Reset(); + + EXPECT_EQ(buffer.GetSumProbabilities(), 0.0f); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 9.0f, kAbsError); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 9.0f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSumAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(0.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + buffer.Update(1.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 3.0f, kAbsError); + + buffer.Update(0.0f); + + EXPECT_NEAR(buffer.GetSumProbabilities(), 0.0f, kAbsError); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterNoUpdates) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveChangesFromFalseToTrue) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + // Add low probabilities until the buffer is full. That's not enough + // to make `IsActiveSegment()` to return true. + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + // Add high probabilities until `IsActiveSegment()` returns true. + for (int i = 0; i < kActivityThreshold * kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveChangesFromTrueToFalse) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + // Add high probabilities until the buffer is full. That's enough to + // make `IsActiveSegment()` to return true. + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + // Add low probabilities until `IsActiveSegment()` returns false. + for (int i = 0; i < (1.0f - kActivityThreshold) * kNumAnalysisFrames - 1; + ++i) { + buffer.Update(0.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsActiveAfterUpdatesWithHighProbabilities) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterUpdatesWithLowProbabilities) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(0.3f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.3f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveAfterBufferIsFull) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(1.0f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterBufferIsFull) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames - 1; ++i) { + buffer.Update(0.29f); + } + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.29f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.29f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsNotActiveAfterReset) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Reset(); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientRemovedAfterFewUpdates) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + buffer.Update(0.4f); + buffer.Update(0.4f); + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsActiveAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientNotRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.1f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, + CheckSegmentIsNotActiveAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(0.1f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_FALSE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_FALSE(buffer.IsActiveSegment()); +} + +TEST(SpeechProbabilityBufferTest, CheckSegmentIsActiveAfterTransientRemoved) { + SpeechProbabilityBuffer buffer(kLowProbabilityThreshold); + + for (int i = 0; i < kNumAnalysisFrames; ++i) { + buffer.Update(1.0f); + } + + buffer.Update(0.7f); + buffer.Update(0.8f); + buffer.Update(0.9f); + buffer.Update(1.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.0f); + + EXPECT_TRUE(buffer.IsActiveSegment()); + + buffer.Update(0.7f); + + EXPECT_TRUE(buffer.IsActiveSegment()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc new file mode 100644 index 0000000000..af6325dea7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include +#include + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kNumFramesPerSecond = 100; + +class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad { + public: + explicit MonoVadImpl(const AvailableCpuFeatures& cpu_features) + : features_extractor_(cpu_features), rnn_vad_(cpu_features) {} + MonoVadImpl(const MonoVadImpl&) = delete; + MonoVadImpl& operator=(const MonoVadImpl&) = delete; + ~MonoVadImpl() = default; + + int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; } + void Reset() override { rnn_vad_.Reset(); } + float Analyze(rtc::ArrayView frame) override { + RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz); + std::array feature_vector; + const bool is_silence = features_extractor_.CheckSilenceComputeFeatures( + /*samples=*/{frame.data(), rnn_vad::kFrameSize10ms24kHz}, + feature_vector); + return rnn_vad_.ComputeVadProbability(feature_vector, is_silence); + } + + private: + rnn_vad::FeaturesExtractor features_extractor_; + rnn_vad::RnnVad rnn_vad_; +}; + +} // namespace + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(kVadResetPeriodMs, + cpu_features, + sample_rate_hz) {} + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(vad_reset_period_ms, + std::make_unique(cpu_features), + sample_rate_hz) {} + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + std::unique_ptr vad, + int sample_rate_hz) + : vad_reset_period_frames_( + rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)), + time_to_vad_reset_(vad_reset_period_frames_), + vad_(std::move(vad)) { + RTC_DCHECK(vad_); + RTC_DCHECK_GT(vad_reset_period_frames_, 1); + resampled_buffer_.resize( + rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond)); + Initialize(sample_rate_hz); +} + +VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default; + +void VoiceActivityDetectorWrapper::Initialize(int sample_rate_hz) { + RTC_DCHECK_GT(sample_rate_hz, 0); + frame_size_ = rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond); + int status = + resampler_.InitializeIfNeeded(sample_rate_hz, vad_->SampleRateHz(), + /*num_channels=*/1); + constexpr int kStatusOk = 0; + RTC_DCHECK_EQ(status, kStatusOk); + vad_->Reset(); +} + +float VoiceActivityDetectorWrapper::Analyze(AudioFrameView frame) { + // Periodically reset the VAD. + time_to_vad_reset_--; + if (time_to_vad_reset_ <= 0) { + vad_->Reset(); + time_to_vad_reset_ = vad_reset_period_frames_; + } + // Resample the first channel of `frame`. + RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_); + resampler_.Resample(frame.channel(0).data(), frame_size_, + resampled_buffer_.data(), resampled_buffer_.size()); + + return vad_->Analyze(resampled_buffer_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h new file mode 100644 index 0000000000..459c471630 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ + +#include +#include + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze +// the first channel of the input audio frames. Takes care of resampling the +// input frames to match the sample rate of the wrapped VAD and periodically +// resets the VAD. +class VoiceActivityDetectorWrapper { + public: + // Single channel VAD interface. + class MonoVad { + public: + virtual ~MonoVad() = default; + // Returns the sample rate (Hz) required for the input frames analyzed by + // `ComputeProbability`. + virtual int SampleRateHz() const = 0; + // Resets the internal state. + virtual void Reset() = 0; + // Analyzes an audio frame and returns the speech probability. + virtual float Analyze(rtc::ArrayView frame) = 0; + }; + + // Ctor. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + + // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call + // `MonoVad::Reset()`; it must be equal to or greater than the duration of two + // frames. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + // Ctor. Uses a custom `vad`. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + std::unique_ptr vad, + int sample_rate_hz); + + VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete; + VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) = + delete; + ~VoiceActivityDetectorWrapper(); + + // Initializes the VAD wrapper. + void Initialize(int sample_rate_hz); + + // Analyzes the first channel of `frame` and returns the speech probability. + // `frame` must be a 10 ms frame with the sample rate specified in the last + // `Initialize()` call. + float Analyze(AudioFrameView frame); + + private: + const int vad_reset_period_frames_; + int frame_size_; + int time_to_vad_reset_; + PushResampler resampler_; + std::unique_ptr vad_; + std::vector resampled_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build new file mode 100644 index 0000000000..dfa2765108 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vad_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc new file mode 100644 index 0000000000..91efdb566e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gmock.h" + +namespace webrtc { +namespace { + +using ::testing::AnyNumber; +using ::testing::Return; +using ::testing::ReturnRoundRobin; +using ::testing::Truly; + +constexpr int kNumFramesPerSecond = 100; + +constexpr int kNoVadPeriodicReset = + kFrameDurationMs * (std::numeric_limits::max() / kFrameDurationMs); + +constexpr int kSampleRate8kHz = 8000; + +class MockVad : public VoiceActivityDetectorWrapper::MonoVad { + public: + MOCK_METHOD(int, SampleRateHz, (), (const, override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(float, Analyze, (rtc::ArrayView frame), (override)); +}; + +// Checks that the ctor and `Initialize()` read the sample rate of the wrapped +// VAD. +TEST(GainController2VoiceActivityDetectorWrapper, CtorAndInitReadSampleRate) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(2) + .WillRepeatedly(Return(kSampleRate8kHz)); + EXPECT_CALL(*vad, Reset).Times(AnyNumber()); + auto vad_wrapper = std::make_unique( + kNoVadPeriodicReset, std::move(vad), kSampleRate8kHz); +} + +// Creates a `VoiceActivityDetectorWrapper` injecting a mock VAD that +// repeatedly returns the next value from `speech_probabilities` and that +// restarts from the beginning when after the last element is returned. +std::unique_ptr CreateMockVadWrapper( + int vad_reset_period_ms, + int sample_rate_hz, + const std::vector& speech_probabilities, + int expected_vad_reset_calls) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(sample_rate_hz)); + if (expected_vad_reset_calls >= 0) { + EXPECT_CALL(*vad, Reset).Times(expected_vad_reset_calls); + } + EXPECT_CALL(*vad, Analyze) + .Times(AnyNumber()) + .WillRepeatedly(ReturnRoundRobin(speech_probabilities)); + return std::make_unique( + vad_reset_period_ms, std::move(vad), kSampleRate8kHz); +} + +// 10 ms mono frame. +struct FrameWithView { + // Ctor. Initializes the frame samples with `value`. + explicit FrameWithView(int sample_rate_hz) + : samples(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond), + 0.0f), + channel0(samples.data()), + view(&channel0, /*num_channels=*/1, samples.size()) {} + std::vector samples; + const float* const channel0; + const AudioFrameView view; +}; + +// Checks that the expected speech probabilities are returned. +TEST(GainController2VoiceActivityDetectorWrapper, CheckSpeechProbabilities) { + const std::vector speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f, + 0.44f, 0.525f, 0.858f, 0.314f, + 0.653f, 0.965f, 0.413f, 0.0f}; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + speech_probabilities, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(speech_probabilities[i], vad_wrapper->Analyze(frame.view)); + } +} + +// Checks that the VAD is not periodically reset. +TEST(GainController2VoiceActivityDetectorWrapper, VadNoPeriodicReset) { + constexpr int kNumFrames = 19; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < kNumFrames; ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +class VadPeriodResetParametrization + : public ::testing::TestWithParam> { + protected: + int num_frames() const { return std::get<0>(GetParam()); } + int vad_reset_period_frames() const { return std::get<1>(GetParam()); } +}; + +// Checks that the VAD is periodically reset with the expected period. +TEST_P(VadPeriodResetParametrization, VadPeriodicReset) { + auto vad_wrapper = CreateMockVadWrapper( + /*vad_reset_period_ms=*/vad_reset_period_frames() * kFrameDurationMs, + kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1 + + num_frames() / vad_reset_period_frames()); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < num_frames(); ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController2VoiceActivityDetectorWrapper, + VadPeriodResetParametrization, + ::testing::Combine(::testing::Values(1, 19, 123), + ::testing::Values(2, 5, 20, 53))); + +class VadResamplingParametrization + : public ::testing::TestWithParam> { + protected: + int input_sample_rate_hz() const { return std::get<0>(GetParam()); } + int vad_sample_rate_hz() const { return std::get<1>(GetParam()); } +}; + +// Checks that regardless of the input audio sample rate, the wrapped VAD +// analyzes frames having the expected size, that is according to its internal +// sample rate. +TEST_P(VadResamplingParametrization, CheckResampledFrameSize) { + auto vad = std::make_unique(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(vad_sample_rate_hz())); + EXPECT_CALL(*vad, Reset).Times(1); + EXPECT_CALL(*vad, Analyze(Truly([this](rtc::ArrayView frame) { + return rtc::SafeEq(frame.size(), rtc::CheckedDivExact(vad_sample_rate_hz(), + kNumFramesPerSecond)); + }))).Times(1); + auto vad_wrapper = std::make_unique( + kNoVadPeriodicReset, std::move(vad), input_sample_rate_hz()); + FrameWithView frame(input_sample_rate_hz()); + vad_wrapper->Analyze(frame.view); +} + +INSTANTIATE_TEST_SUITE_P( + GainController2VoiceActivityDetectorWrapper, + VadResamplingParametrization, + ::testing::Combine(::testing::Values(8000, 16000, 44100, 48000), + ::testing::Values(6000, 8000, 12000, 16000, 24000))); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc new file mode 100644 index 0000000000..a70d815196 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vector_float_frame.h" + +namespace webrtc { + +namespace { + +std::vector ConstructChannelPointers( + std::vector>* x) { + std::vector channel_ptrs; + for (auto& v : *x) { + channel_ptrs.push_back(v.data()); + } + return channel_ptrs; +} +} // namespace + +VectorFloatFrame::VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value) + : channels_(num_channels, + std::vector(samples_per_channel, start_value)), + channel_ptrs_(ConstructChannelPointers(&channels_)), + float_frame_view_(channel_ptrs_.data(), + channels_.size(), + samples_per_channel) {} + +VectorFloatFrame::~VectorFloatFrame() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h new file mode 100644 index 0000000000..b521f346f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ + +#include + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// A construct consisting of a multi-channel audio frame, and a FloatFrame view +// of it. +class VectorFloatFrame { + public: + VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value); + const AudioFrameView& float_frame_view() { return float_frame_view_; } + AudioFrameView float_frame_view() const { + return float_frame_view_; + } + + ~VectorFloatFrame(); + + private: + std::vector> channels_; + std::vector channel_ptrs_; + AudioFrameView float_frame_view_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build new file mode 100644 index 0000000000..7f1c48ac39 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/api_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("api_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build new file mode 100644 index 0000000000..b6d96414b4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/apm_logging_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("apm_logging_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc b/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc new file mode 100644 index 0000000000..3dbe1fe072 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" + +#include + +#include + +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "modules/audio_processing/splitting_filter.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr size_t kSamplesPer32kHzChannel = 320; +constexpr size_t kSamplesPer48kHzChannel = 480; +constexpr size_t kMaxSamplesPerChannel = AudioBuffer::kMaxSampleRate / 100; + +size_t NumBandsFromFramesPerChannel(size_t num_frames) { + if (num_frames == kSamplesPer32kHzChannel) { + return 2; + } + if (num_frames == kSamplesPer48kHzChannel) { + return 3; + } + return 1; +} + +} // namespace + +AudioBuffer::AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels) + : input_num_frames_(static_cast(input_rate) / 100), + input_num_channels_(input_num_channels), + buffer_num_frames_(static_cast(buffer_rate) / 100), + buffer_num_channels_(buffer_num_channels), + output_num_frames_(static_cast(output_rate) / 100), + output_num_channels_(0), + num_channels_(buffer_num_channels), + num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)), + num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)), + data_( + new ChannelBuffer(buffer_num_frames_, buffer_num_channels_)) { + RTC_DCHECK_GT(input_num_frames_, 0); + RTC_DCHECK_GT(buffer_num_frames_, 0); + RTC_DCHECK_GT(output_num_frames_, 0); + RTC_DCHECK_GT(input_num_channels_, 0); + RTC_DCHECK_GT(buffer_num_channels_, 0); + RTC_DCHECK_LE(buffer_num_channels_, input_num_channels_); + + const bool input_resampling_needed = input_num_frames_ != buffer_num_frames_; + const bool output_resampling_needed = + output_num_frames_ != buffer_num_frames_; + if (input_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + input_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(input_num_frames_, buffer_num_frames_))); + } + } + + if (output_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + output_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(buffer_num_frames_, output_num_frames_))); + } + } + + if (num_bands_ > 1) { + split_data_.reset(new ChannelBuffer( + buffer_num_frames_, buffer_num_channels_, num_bands_)); + splitting_filter_.reset(new SplittingFilter( + buffer_num_channels_, num_bands_, buffer_num_frames_)); + } +} + +AudioBuffer::~AudioBuffer() {} + +void AudioBuffer::set_downmixing_to_specific_channel(size_t channel) { + downmix_by_averaging_ = false; + RTC_DCHECK_GT(input_num_channels_, channel); + channel_for_downmixing_ = std::min(channel, input_num_channels_ - 1); +} + +void AudioBuffer::set_downmixing_by_averaging() { + downmix_by_averaging_ = true; +} + +void AudioBuffer::CopyFrom(const float* const* stacked_data, + const StreamConfig& stream_config) { + RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); + RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); + RestoreNumChannels(); + const bool downmix_needed = input_num_channels_ > 1 && num_channels_ == 1; + + const bool resampling_needed = input_num_frames_ != buffer_num_frames_; + + if (downmix_needed) { + RTC_DCHECK_GE(kMaxSamplesPerChannel, input_num_frames_); + + std::array downmix; + if (downmix_by_averaging_) { + const float kOneByNumChannels = 1.f / input_num_channels_; + for (size_t i = 0; i < input_num_frames_; ++i) { + float value = stacked_data[0][i]; + for (size_t j = 1; j < input_num_channels_; ++j) { + value += stacked_data[j][i]; + } + downmix[i] = value * kOneByNumChannels; + } + } + const float* downmixed_data = downmix_by_averaging_ + ? downmix.data() + : stacked_data[channel_for_downmixing_]; + + if (resampling_needed) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], buffer_num_frames_); + } + const float* data_to_convert = + resampling_needed ? data_->channels()[0] : downmixed_data; + FloatToFloatS16(data_to_convert, buffer_num_frames_, data_->channels()[0]); + } else { + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + input_resamplers_[i]->Resample(stacked_data[i], input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + FloatToFloatS16(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatToFloatS16(stacked_data[i], buffer_num_frames_, + data_->channels()[i]); + } + } + } +} + +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + float* const* stacked_data) { + RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); + + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + stacked_data[i], output_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, + stacked_data[i]); + } + } + + for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { + memcpy(stacked_data[i], stacked_data[0], + output_num_frames_ * sizeof(**stacked_data)); + } +} + +void AudioBuffer::CopyTo(AudioBuffer* buffer) const { + RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_); + + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + buffer->channels()[i], + buffer->num_frames()); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + memcpy(buffer->channels()[i], data_->channels()[i], + buffer_num_frames_ * sizeof(**buffer->channels())); + } + } + + for (size_t i = num_channels_; i < buffer->num_channels(); ++i) { + memcpy(buffer->channels()[i], buffer->channels()[0], + output_num_frames_ * sizeof(**buffer->channels())); + } +} + +void AudioBuffer::RestoreNumChannels() { + num_channels_ = buffer_num_channels_; + data_->set_num_channels(buffer_num_channels_); + if (split_data_.get()) { + split_data_->set_num_channels(buffer_num_channels_); + } +} + +void AudioBuffer::set_num_channels(size_t num_channels) { + RTC_DCHECK_GE(buffer_num_channels_, num_channels); + num_channels_ = num_channels; + data_->set_num_channels(num_channels); + if (split_data_.get()) { + split_data_->set_num_channels(num_channels); + } +} + +// The resampler is only for supporting 48kHz to 16kHz in the reverse stream. +void AudioBuffer::CopyFrom(const int16_t* const interleaved_data, + const StreamConfig& stream_config) { + RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); + RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); + RestoreNumChannels(); + + const bool resampling_required = input_num_frames_ != buffer_num_frames_; + + const int16_t* interleaved = interleaved_data; + if (num_channels_ == 1) { + if (input_num_channels_ == 1) { + if (resampling_required) { + std::array float_buffer; + S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data()); + input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } else { + S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]); + } + } else { + std::array float_buffer; + float* downmixed_data = + resampling_required ? float_buffer.data() : data_->channels()[0]; + if (downmix_by_averaging_) { + for (size_t j = 0, k = 0; j < input_num_frames_; ++j) { + int32_t sum = 0; + for (size_t i = 0; i < input_num_channels_; ++i, ++k) { + sum += interleaved[k]; + } + downmixed_data[j] = sum / static_cast(input_num_channels_); + } + } else { + for (size_t j = 0, k = channel_for_downmixing_; j < input_num_frames_; + ++j, k += input_num_channels_) { + downmixed_data[j] = interleaved[k]; + } + } + + if (resampling_required) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } + } + } else { + auto deinterleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const int16_t* x, + float* y) { + for (size_t j = 0, k = channel; j < samples_per_channel; + ++j, k += num_channels) { + y[j] = x[k]; + } + }; + + if (resampling_required) { + std::array float_buffer; + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + float_buffer.data()); + input_resamplers_[i]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + data_->channels()[i]); + } + } + } +} + +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + int16_t* const interleaved_data) { + const size_t config_num_channels = stream_config.num_channels(); + + RTC_DCHECK(config_num_channels == num_channels_ || num_channels_ == 1); + RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); + + const bool resampling_required = buffer_num_frames_ != output_num_frames_; + + int16_t* interleaved = interleaved_data; + if (num_channels_ == 1) { + std::array float_buffer; + + if (resampling_required) { + output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_, + float_buffer.data(), output_num_frames_); + } + const float* deinterleaved = + resampling_required ? float_buffer.data() : data_->channels()[0]; + + if (config_num_channels == 1) { + for (size_t j = 0; j < output_num_frames_; ++j) { + interleaved[j] = FloatS16ToS16(deinterleaved[j]); + } + } else { + for (size_t i = 0, k = 0; i < output_num_frames_; ++i) { + float tmp = FloatS16ToS16(deinterleaved[i]); + for (size_t j = 0; j < config_num_channels; ++j, ++k) { + interleaved[k] = tmp; + } + } + } + } else { + auto interleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const float* x, + int16_t* y) { + for (size_t k = 0, j = channel; k < samples_per_channel; + ++k, j += num_channels) { + y[j] = FloatS16ToS16(x[k]); + } + }; + + if (resampling_required) { + for (size_t i = 0; i < num_channels_; ++i) { + std::array float_buffer; + output_resamplers_[i]->Resample(data_->channels()[i], + buffer_num_frames_, float_buffer.data(), + output_num_frames_); + interleave_channel(i, config_num_channels, output_num_frames_, + float_buffer.data(), interleaved); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + interleave_channel(i, config_num_channels, output_num_frames_, + data_->channels()[i], interleaved); + } + } + + for (size_t i = num_channels_; i < config_num_channels; ++i) { + for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_; + ++j, k += config_num_channels, n += config_num_channels) { + interleaved[k] = interleaved[n]; + } + } + } +} + +void AudioBuffer::SplitIntoFrequencyBands() { + splitting_filter_->Analysis(data_.get(), split_data_.get()); +} + +void AudioBuffer::MergeFrequencyBands() { + splitting_filter_->Synthesis(split_data_.get(), data_.get()); +} + +void AudioBuffer::ExportSplitChannelData( + size_t channel, + int16_t* const* split_band_data) const { + for (size_t k = 0; k < num_bands(); ++k) { + const float* band_data = split_bands_const(channel)[k]; + + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + split_band_data[k][i] = FloatS16ToS16(band_data[i]); + } + } +} + +void AudioBuffer::ImportSplitChannelData( + size_t channel, + const int16_t* const* split_band_data) { + for (size_t k = 0; k < num_bands(); ++k) { + float* band_data = split_bands(channel)[k]; + RTC_DCHECK(split_band_data[k]); + RTC_DCHECK(band_data); + for (size_t i = 0; i < num_frames_per_band(); ++i) { + band_data[i] = split_band_data[k][i]; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer.h b/third_party/libwebrtc/modules/audio_processing/audio_buffer.h new file mode 100644 index 0000000000..b9ea3000a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ + +#include +#include + +#include +#include + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class PushSincResampler; +class SplittingFilter; + +enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 }; + +// Stores any audio data in a way that allows the audio processing module to +// operate on it in a controlled manner. +class AudioBuffer { + public: + static const int kSplitBandSize = 160; + static const int kMaxSampleRate = 384000; + AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels); + + virtual ~AudioBuffer(); + + AudioBuffer(const AudioBuffer&) = delete; + AudioBuffer& operator=(const AudioBuffer&) = delete; + + // Specify that downmixing should be done by selecting a single channel. + void set_downmixing_to_specific_channel(size_t channel); + + // Specify that downmixing should be done by averaging all channels,. + void set_downmixing_by_averaging(); + + // Set the number of channels in the buffer. The specified number of channels + // cannot be larger than the specified buffer_num_channels. The number is also + // reset at each call to CopyFrom or InterleaveFrom. + void set_num_channels(size_t num_channels); + + size_t num_channels() const { return num_channels_; } + size_t num_frames() const { return buffer_num_frames_; } + size_t num_frames_per_band() const { return num_split_frames_; } + size_t num_bands() const { return num_bands_; } + + // Returns pointer arrays to the full-band channels. + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < `buffer_num_channels_` + // 0 <= sample < `buffer_num_frames_` + float* const* channels() { return data_->channels(); } + const float* const* channels_const() const { return data_->channels(); } + + // Returns pointer arrays to the bands for a specific channel. + // Usage: + // split_bands(channel)[band][sample]. + // Where: + // 0 <= channel < `buffer_num_channels_` + // 0 <= band < `num_bands_` + // 0 <= sample < `num_split_frames_` + const float* const* split_bands_const(size_t channel) const { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } + float* const* split_bands(size_t channel) { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } + + // Returns a pointer array to the channels for a specific band. + // Usage: + // split_channels(band)[channel][sample]. + // Where: + // 0 <= band < `num_bands_` + // 0 <= channel < `buffer_num_channels_` + // 0 <= sample < `num_split_frames_` + const float* const* split_channels_const(Band band) const { + if (split_data_.get()) { + return split_data_->channels(band); + } else { + return band == kBand0To8kHz ? data_->channels() : nullptr; + } + } + + // Copies data into the buffer. + void CopyFrom(const int16_t* const interleaved_data, + const StreamConfig& stream_config); + void CopyFrom(const float* const* stacked_data, + const StreamConfig& stream_config); + + // Copies data from the buffer. + void CopyTo(const StreamConfig& stream_config, + int16_t* const interleaved_data); + void CopyTo(const StreamConfig& stream_config, float* const* stacked_data); + void CopyTo(AudioBuffer* buffer) const; + + // Splits the buffer data into frequency bands. + void SplitIntoFrequencyBands(); + + // Recombines the frequency bands into a full-band signal. + void MergeFrequencyBands(); + + // Copies the split bands data into the integer two-dimensional array. + void ExportSplitChannelData(size_t channel, + int16_t* const* split_band_data) const; + + // Copies the data in the integer two-dimensional array into the split_bands + // data. + void ImportSplitChannelData(size_t channel, + const int16_t* const* split_band_data); + + static const size_t kMaxSplitFrameLength = 160; + static const size_t kMaxNumBands = 3; + + // Deprecated methods, will be removed soon. + float* const* channels_f() { return channels(); } + const float* const* channels_const_f() const { return channels_const(); } + const float* const* split_bands_const_f(size_t channel) const { + return split_bands_const(channel); + } + float* const* split_bands_f(size_t channel) { return split_bands(channel); } + const float* const* split_channels_const_f(Band band) const { + return split_channels_const(band); + } + + private: + FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, + SetNumChannelsSetsChannelBuffersNumChannels); + void RestoreNumChannels(); + + const size_t input_num_frames_; + const size_t input_num_channels_; + const size_t buffer_num_frames_; + const size_t buffer_num_channels_; + const size_t output_num_frames_; + const size_t output_num_channels_; + + size_t num_channels_; + size_t num_bands_; + size_t num_split_frames_; + + std::unique_ptr> data_; + std::unique_ptr> split_data_; + std::unique_ptr splitting_filter_; + std::vector> input_resamplers_; + std::vector> output_resamplers_; + bool downmix_by_averaging_ = true; + size_t channel_for_downmixing_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build new file mode 100644 index 0000000000..2291d2981d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer_gn/moz.build @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/audio_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc new file mode 100644 index 0000000000..f3b2ddc689 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_buffer_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" + +#include + +#include "test/gtest.h" +#include "test/testsupport/rtc_expect_death.h" + +namespace webrtc { + +namespace { + +const size_t kSampleRateHz = 48000u; +const size_t kStereo = 2u; +const size_t kMono = 1u; + +void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) { + EXPECT_EQ(ab.num_channels(), num_channels); +} + +} // namespace + +TEST(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels) { + AudioBuffer ab(kSampleRateHz, kStereo, kSampleRateHz, kStereo, kSampleRateHz, + kStereo); + ExpectNumChannels(ab, kStereo); + ab.set_num_channels(1); + ExpectNumChannels(ab, kMono); + ab.RestoreNumChannels(); + ExpectNumChannels(ab, kStereo); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(AudioBufferDeathTest, SetNumChannelsDeathTest) { + AudioBuffer ab(kSampleRateHz, kMono, kSampleRateHz, kMono, kSampleRateHz, + kMono); + RTC_EXPECT_DEATH(ab.set_num_channels(kStereo), "num_channels"); +} +#endif + +TEST(AudioBufferTest, CopyWithoutResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 32000, 2); + AudioBuffer ab2(32000, 2, 32000, 2, 32000, 2); + // Fill first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = i + ch; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Verify content of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + EXPECT_EQ(ab2.channels()[ch][i], i + ch); + } + } +} + +TEST(AudioBufferTest, CopyWithResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 48000, 2); + AudioBuffer ab2(48000, 2, 48000, 2, 48000, 2); + float energy_ab1 = 0.f; + float energy_ab2 = 0.f; + const float pi = std::acos(-1.f); + // Put a sine and compute energy of first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = std::sin(2 * pi * 100.f / 32000.f * i); + energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i]; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Compute energy of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i]; + } + } + // Verify that energies match. + EXPECT_NEAR(energy_ab1, energy_ab2 * 32000.f / 48000.f, .01f * energy_ab1); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build new file mode 100644 index 0000000000..683d86e743 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_proxies_gn/moz.build @@ -0,0 +1,225 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_proxies_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build new file mode 100644 index 0000000000..9b90bca379 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_gn/moz.build @@ -0,0 +1,205 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_frame_view_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc new file mode 100644 index 0000000000..fd25bc3b0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_frame_view_unittest.cc @@ -0,0 +1,51 @@ +/* + * Copyright 2018 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_frame_view.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +TEST(AudioFrameTest, ConstructFromAudioBuffer) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 2; + constexpr float kFloatConstant = 1272.f; + constexpr float kIntConstant = 17252; + const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels); + webrtc::AudioBuffer buffer( + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); + + AudioFrameView non_const_view(buffer.channels(), buffer.num_channels(), + buffer.num_frames()); + // Modification is allowed. + non_const_view.channel(0)[0] = kFloatConstant; + EXPECT_EQ(buffer.channels()[0][0], kFloatConstant); + + AudioFrameView const_view( + buffer.channels(), buffer.num_channels(), buffer.num_frames()); + // Modification is not allowed. + // const_view.channel(0)[0] = kFloatConstant; + + // Assignment is allowed. + AudioFrameView other_const_view = non_const_view; + static_cast(other_const_view); + + // But not the other way. The following will fail: + // non_const_view = other_const_view; + + AudioFrameView non_const_float_view( + buffer.channels(), buffer.num_channels(), buffer.num_frames()); + non_const_float_view.channel(0)[0] = kIntConstant; + EXPECT_EQ(buffer.channels()[0][0], kIntConstant); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc new file mode 100644 index 0000000000..a246448c26 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/make_ref_counted.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +AudioProcessingBuilder::AudioProcessingBuilder() = default; +AudioProcessingBuilder::~AudioProcessingBuilder() = default; + +rtc::scoped_refptr AudioProcessingBuilder::Create() { +#ifdef WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + // Return a null pointer when the APM is excluded from the build. + return nullptr; +#else // WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + return rtc::make_ref_counted( + config_, std::move(capture_post_processing_), + std::move(render_pre_processing_), std::move(echo_control_factory_), + std::move(echo_detector_), std::move(capture_analyzer_)); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build new file mode 100644 index 0000000000..ab0ca7113f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_gn/moz.build @@ -0,0 +1,239 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/audio_processing_builder_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_processing_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc new file mode 100644 index 0000000000..c304453388 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.cc @@ -0,0 +1,2649 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/audio_frame.h" +#include "common_audio/audio_converter.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "rtc_base/checks.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "rtc_base/time_utils.h" +#include "rtc_base/trace_event.h" +#include "system_wrappers/include/denormal_disabler.h" +#include "system_wrappers/include/field_trial.h" +#include "system_wrappers/include/metrics.h" + +#define RETURN_ON_ERR(expr) \ + do { \ + int err = (expr); \ + if (err != kNoError) { \ + return err; \ + } \ + } while (0) + +namespace webrtc { + +namespace { + +bool SampleRateSupportsMultiBand(int sample_rate_hz) { + return sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz; +} + +// Checks whether the high-pass filter should be done in the full-band. +bool EnforceSplitBandHpf() { + return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch"); +} + +// Checks whether AEC3 should be allowed to decide what the default +// configuration should be based on the render and capture channel configuration +// at hand. +bool UseSetupSpecificDefaultAec3Congfig() { + return !field_trial::IsEnabled( + "WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch"); +} + +// Identify the native processing rate that best handles a sample rate. +int SuitableProcessRate(int minimum_rate, + int max_splitting_rate, + bool band_splitting_required) { + const int uppermost_native_rate = + band_splitting_required ? max_splitting_rate : 48000; + for (auto rate : {16000, 32000, 48000}) { + if (rate >= uppermost_native_rate) { + return uppermost_native_rate; + } + if (rate >= minimum_rate) { + return rate; + } + } + RTC_DCHECK_NOTREACHED(); + return uppermost_native_rate; +} + +GainControl::Mode Agc1ConfigModeToInterfaceMode( + AudioProcessing::Config::GainController1::Mode mode) { + using Agc1Config = AudioProcessing::Config::GainController1; + switch (mode) { + case Agc1Config::kAdaptiveAnalog: + return GainControl::kAdaptiveAnalog; + case Agc1Config::kAdaptiveDigital: + return GainControl::kAdaptiveDigital; + case Agc1Config::kFixedDigital: + return GainControl::kFixedDigital; + } + RTC_CHECK_NOTREACHED(); +} + +bool MinimizeProcessingForUnusedOutput() { + return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch"); +} + +// Maximum lengths that frame of samples being passed from the render side to +// the capture side can have (does not apply to AEC3). +static const size_t kMaxAllowedValuesOfSamplesPerBand = 160; +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480; + +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; + +void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio, + std::vector& packed_buffer) { + packed_buffer.clear(); + packed_buffer.insert(packed_buffer.end(), audio.channels_const()[0], + audio.channels_const()[0] + audio.num_frames()); +} + +// Options for gracefully handling processing errors. +enum class FormatErrorOutputOption { + kOutputExactCopyOfInput, + kOutputBroadcastCopyOfFirstInputChannel, + kOutputSilence, + kDoNothing +}; + +enum class AudioFormatValidity { + // Format is supported by APM. + kValidAndSupported, + // Format has a reasonable interpretation but is not supported. + kValidButUnsupportedSampleRate, + // The remaining enums values signal that the audio does not have a reasonable + // interpretation and cannot be used. + kInvalidSampleRate, + kInvalidChannelCount +}; + +AudioFormatValidity ValidateAudioFormat(const StreamConfig& config) { + if (config.sample_rate_hz() < 0) + return AudioFormatValidity::kInvalidSampleRate; + if (config.num_channels() == 0) + return AudioFormatValidity::kInvalidChannelCount; + + // Format has a reasonable interpretation, but may still be unsupported. + if (config.sample_rate_hz() < 8000 || + config.sample_rate_hz() > AudioBuffer::kMaxSampleRate) + return AudioFormatValidity::kValidButUnsupportedSampleRate; + + // Format is fully supported. + return AudioFormatValidity::kValidAndSupported; +} + +int AudioFormatValidityToErrorCode(AudioFormatValidity validity) { + switch (validity) { + case AudioFormatValidity::kValidAndSupported: + return AudioProcessing::kNoError; + case AudioFormatValidity::kValidButUnsupportedSampleRate: // fall-through + case AudioFormatValidity::kInvalidSampleRate: + return AudioProcessing::kBadSampleRateError; + case AudioFormatValidity::kInvalidChannelCount: + return AudioProcessing::kBadNumberChannelsError; + } + RTC_DCHECK(false); +} + +// Returns an AudioProcessing::Error together with the best possible option for +// output audio content. +std::pair ChooseErrorOutputOption( + const StreamConfig& input_config, + const StreamConfig& output_config) { + AudioFormatValidity input_validity = ValidateAudioFormat(input_config); + AudioFormatValidity output_validity = ValidateAudioFormat(output_config); + + if (input_validity == AudioFormatValidity::kValidAndSupported && + output_validity == AudioFormatValidity::kValidAndSupported && + (output_config.num_channels() == 1 || + output_config.num_channels() == input_config.num_channels())) { + return {AudioProcessing::kNoError, FormatErrorOutputOption::kDoNothing}; + } + + int error_code = AudioFormatValidityToErrorCode(input_validity); + if (error_code == AudioProcessing::kNoError) { + error_code = AudioFormatValidityToErrorCode(output_validity); + } + if (error_code == AudioProcessing::kNoError) { + // The individual formats are valid but there is some error - must be + // channel mismatch. + error_code = AudioProcessing::kBadNumberChannelsError; + } + + FormatErrorOutputOption output_option; + if (output_validity != AudioFormatValidity::kValidAndSupported && + output_validity != AudioFormatValidity::kValidButUnsupportedSampleRate) { + // The output format is uninterpretable: cannot do anything. + output_option = FormatErrorOutputOption::kDoNothing; + } else if (input_validity != AudioFormatValidity::kValidAndSupported && + input_validity != + AudioFormatValidity::kValidButUnsupportedSampleRate) { + // The input format is uninterpretable: cannot use it, must output silence. + output_option = FormatErrorOutputOption::kOutputSilence; + } else if (input_config.sample_rate_hz() != output_config.sample_rate_hz()) { + // Sample rates do not match: Cannot copy input into output, output silence. + // Note: If the sample rates are in a supported range, we could resample. + // However, that would significantly increase complexity of this error + // handling code. + output_option = FormatErrorOutputOption::kOutputSilence; + } else if (input_config.num_channels() != output_config.num_channels()) { + // Channel counts do not match: We cannot easily map input channels to + // output channels. + output_option = + FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel; + } else { + // The formats match exactly. + RTC_DCHECK(input_config == output_config); + output_option = FormatErrorOutputOption::kOutputExactCopyOfInput; + } + return std::make_pair(error_code, output_option); +} + +// Checks if the audio format is supported. If not, the output is populated in a +// best-effort manner and an APM error code is returned. +int HandleUnsupportedAudioFormats(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + RTC_DCHECK(src); + RTC_DCHECK(dest); + + auto [error_code, output_option] = + ChooseErrorOutputOption(input_config, output_config); + if (error_code == AudioProcessing::kNoError) + return AudioProcessing::kNoError; + + const size_t num_output_channels = output_config.num_channels(); + switch (output_option) { + case FormatErrorOutputOption::kOutputSilence: + memset(dest, 0, output_config.num_samples() * sizeof(int16_t)); + break; + case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel: + for (size_t i = 0; i < output_config.num_frames(); ++i) { + int16_t sample = src[input_config.num_channels() * i]; + for (size_t ch = 0; ch < num_output_channels; ++ch) { + dest[ch + num_output_channels * i] = sample; + } + } + break; + case FormatErrorOutputOption::kOutputExactCopyOfInput: + memcpy(dest, src, output_config.num_samples() * sizeof(int16_t)); + break; + case FormatErrorOutputOption::kDoNothing: + break; + } + return error_code; +} + +// Checks if the audio format is supported. If not, the output is populated in a +// best-effort manner and an APM error code is returned. +int HandleUnsupportedAudioFormats(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + RTC_DCHECK(src); + RTC_DCHECK(dest); + for (size_t i = 0; i < input_config.num_channels(); ++i) { + RTC_DCHECK(src[i]); + } + for (size_t i = 0; i < output_config.num_channels(); ++i) { + RTC_DCHECK(dest[i]); + } + + auto [error_code, output_option] = + ChooseErrorOutputOption(input_config, output_config); + if (error_code == AudioProcessing::kNoError) + return AudioProcessing::kNoError; + + const size_t num_output_channels = output_config.num_channels(); + switch (output_option) { + case FormatErrorOutputOption::kOutputSilence: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memset(dest[ch], 0, output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memcpy(dest[ch], src[0], output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kOutputExactCopyOfInput: + for (size_t ch = 0; ch < num_output_channels; ++ch) { + memcpy(dest[ch], src[ch], output_config.num_frames() * sizeof(float)); + } + break; + case FormatErrorOutputOption::kDoNothing: + break; + } + return error_code; +} + +using DownmixMethod = AudioProcessing::Config::Pipeline::DownmixMethod; + +void SetDownmixMethod(AudioBuffer& buffer, DownmixMethod method) { + switch (method) { + case DownmixMethod::kAverageChannels: + buffer.set_downmixing_by_averaging(); + break; + case DownmixMethod::kUseFirstChannel: + buffer.set_downmixing_to_specific_channel(/*channel=*/0); + break; + } +} + +constexpr int kUnspecifiedDataDumpInputVolume = -100; + +} // namespace + +// Throughout webrtc, it's assumed that success is represented by zero. +static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); + +absl::optional +AudioProcessingImpl::GetGainController2ExperimentParams() { + constexpr char kFieldTrialName[] = "WebRTC-Audio-GainController2"; + + if (!field_trial::IsEnabled(kFieldTrialName)) { + return absl::nullopt; + } + + FieldTrialFlag enabled("Enabled", false); + + // Whether the gain control should switch to AGC2. Enabled by default. + FieldTrialParameter switch_to_agc2("switch_to_agc2", true); + + // AGC2 input volume controller configuration. + constexpr InputVolumeController::Config kDefaultInputVolumeControllerConfig; + FieldTrialConstrained min_input_volume( + "min_input_volume", kDefaultInputVolumeControllerConfig.min_input_volume, + 0, 255); + FieldTrialConstrained clipped_level_min( + "clipped_level_min", + kDefaultInputVolumeControllerConfig.clipped_level_min, 0, 255); + FieldTrialConstrained clipped_level_step( + "clipped_level_step", + kDefaultInputVolumeControllerConfig.clipped_level_step, 0, 255); + FieldTrialConstrained clipped_ratio_threshold( + "clipped_ratio_threshold", + kDefaultInputVolumeControllerConfig.clipped_ratio_threshold, 0, 1); + FieldTrialConstrained clipped_wait_frames( + "clipped_wait_frames", + kDefaultInputVolumeControllerConfig.clipped_wait_frames, 0, + absl::nullopt); + FieldTrialParameter enable_clipping_predictor( + "enable_clipping_predictor", + kDefaultInputVolumeControllerConfig.enable_clipping_predictor); + FieldTrialConstrained target_range_max_dbfs( + "target_range_max_dbfs", + kDefaultInputVolumeControllerConfig.target_range_max_dbfs, -90, 30); + FieldTrialConstrained target_range_min_dbfs( + "target_range_min_dbfs", + kDefaultInputVolumeControllerConfig.target_range_min_dbfs, -90, 30); + FieldTrialConstrained update_input_volume_wait_frames( + "update_input_volume_wait_frames", + kDefaultInputVolumeControllerConfig.update_input_volume_wait_frames, 0, + absl::nullopt); + FieldTrialConstrained speech_probability_threshold( + "speech_probability_threshold", + kDefaultInputVolumeControllerConfig.speech_probability_threshold, 0, 1); + FieldTrialConstrained speech_ratio_threshold( + "speech_ratio_threshold", + kDefaultInputVolumeControllerConfig.speech_ratio_threshold, 0, 1); + + // AGC2 adaptive digital controller configuration. + constexpr AudioProcessing::Config::GainController2::AdaptiveDigital + kDefaultAdaptiveDigitalConfig; + FieldTrialConstrained headroom_db( + "headroom_db", kDefaultAdaptiveDigitalConfig.headroom_db, 0, + absl::nullopt); + FieldTrialConstrained max_gain_db( + "max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0, + absl::nullopt); + FieldTrialConstrained initial_gain_db( + "initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0, + absl::nullopt); + FieldTrialConstrained max_gain_change_db_per_second( + "max_gain_change_db_per_second", + kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0, + absl::nullopt); + FieldTrialConstrained max_output_noise_level_dbfs( + "max_output_noise_level_dbfs", + kDefaultAdaptiveDigitalConfig.max_output_noise_level_dbfs, absl::nullopt, + 0); + + // Transient suppressor. + FieldTrialParameter disallow_transient_suppressor_usage( + "disallow_transient_suppressor_usage", false); + + // Field-trial based override for the input volume controller and adaptive + // digital configs. + ParseFieldTrial( + {&enabled, &switch_to_agc2, &min_input_volume, &clipped_level_min, + &clipped_level_step, &clipped_ratio_threshold, &clipped_wait_frames, + &enable_clipping_predictor, &target_range_max_dbfs, + &target_range_min_dbfs, &update_input_volume_wait_frames, + &speech_probability_threshold, &speech_ratio_threshold, &headroom_db, + &max_gain_db, &initial_gain_db, &max_gain_change_db_per_second, + &max_output_noise_level_dbfs, &disallow_transient_suppressor_usage}, + field_trial::FindFullName(kFieldTrialName)); + // Checked already by `IsEnabled()` before parsing, therefore always true. + RTC_DCHECK(enabled); + + const bool do_not_change_agc_config = !switch_to_agc2.Get(); + if (do_not_change_agc_config && !disallow_transient_suppressor_usage.Get()) { + // Return an unspecifed value since, in this case, both the AGC2 and TS + // configurations won't be adjusted. + return absl::nullopt; + } + using Params = AudioProcessingImpl::GainController2ExperimentParams; + if (do_not_change_agc_config) { + // Return a value that leaves the AGC2 config unchanged and that always + // disables TS. + return Params{.agc2_config = absl::nullopt, + .disallow_transient_suppressor_usage = true}; + } + // Return a value that switches all the gain control to AGC2. + return Params{ + .agc2_config = + Params::Agc2Config{ + .input_volume_controller = + { + .min_input_volume = min_input_volume.Get(), + .clipped_level_min = clipped_level_min.Get(), + .clipped_level_step = clipped_level_step.Get(), + .clipped_ratio_threshold = + static_cast(clipped_ratio_threshold.Get()), + .clipped_wait_frames = clipped_wait_frames.Get(), + .enable_clipping_predictor = + enable_clipping_predictor.Get(), + .target_range_max_dbfs = target_range_max_dbfs.Get(), + .target_range_min_dbfs = target_range_min_dbfs.Get(), + .update_input_volume_wait_frames = + update_input_volume_wait_frames.Get(), + .speech_probability_threshold = static_cast( + speech_probability_threshold.Get()), + .speech_ratio_threshold = + static_cast(speech_ratio_threshold.Get()), + }, + .adaptive_digital_controller = + { + .enabled = false, + .headroom_db = static_cast(headroom_db.Get()), + .max_gain_db = static_cast(max_gain_db.Get()), + .initial_gain_db = + static_cast(initial_gain_db.Get()), + .max_gain_change_db_per_second = static_cast( + max_gain_change_db_per_second.Get()), + .max_output_noise_level_dbfs = + static_cast(max_output_noise_level_dbfs.Get()), + }}, + .disallow_transient_suppressor_usage = + disallow_transient_suppressor_usage.Get()}; +} + +AudioProcessing::Config AudioProcessingImpl::AdjustConfig( + const AudioProcessing::Config& config, + const absl::optional& + experiment_params) { + if (!experiment_params.has_value() || + (!experiment_params->agc2_config.has_value() && + !experiment_params->disallow_transient_suppressor_usage)) { + // When the experiment parameters are unspecified or when the AGC and TS + // configuration are not overridden, return the unmodified configuration. + return config; + } + + AudioProcessing::Config adjusted_config = config; + + // Override the transient suppressor configuration. + if (experiment_params->disallow_transient_suppressor_usage) { + adjusted_config.transient_suppression.enabled = false; + } + + // Override the auto gain control configuration if the AGC1 analog gain + // controller is active and `experiment_params->agc2_config` is specified. + const bool agc1_analog_enabled = + config.gain_controller1.enabled && + (config.gain_controller1.mode == + AudioProcessing::Config::GainController1::kAdaptiveAnalog || + config.gain_controller1.analog_gain_controller.enabled); + if (agc1_analog_enabled && experiment_params->agc2_config.has_value()) { + // Check that the unadjusted AGC config meets the preconditions. + const bool hybrid_agc_config_detected = + config.gain_controller1.enabled && + config.gain_controller1.analog_gain_controller.enabled && + !config.gain_controller1.analog_gain_controller + .enable_digital_adaptive && + config.gain_controller2.enabled && + config.gain_controller2.adaptive_digital.enabled; + const bool full_agc1_config_detected = + config.gain_controller1.enabled && + config.gain_controller1.analog_gain_controller.enabled && + config.gain_controller1.analog_gain_controller + .enable_digital_adaptive && + !config.gain_controller2.enabled; + const bool one_and_only_one_input_volume_controller = + hybrid_agc_config_detected != full_agc1_config_detected; + const bool agc2_input_volume_controller_enabled = + config.gain_controller2.enabled && + config.gain_controller2.input_volume_controller.enabled; + if (!one_and_only_one_input_volume_controller || + agc2_input_volume_controller_enabled) { + RTC_LOG(LS_ERROR) << "Cannot adjust AGC config (precondition failed)"; + if (!one_and_only_one_input_volume_controller) + RTC_LOG(LS_ERROR) + << "One and only one input volume controller must be enabled."; + if (agc2_input_volume_controller_enabled) + RTC_LOG(LS_ERROR) + << "The AGC2 input volume controller must be disabled."; + } else { + adjusted_config.gain_controller1.enabled = false; + adjusted_config.gain_controller1.analog_gain_controller.enabled = false; + + adjusted_config.gain_controller2.enabled = true; + adjusted_config.gain_controller2.input_volume_controller.enabled = true; + adjusted_config.gain_controller2.adaptive_digital = + experiment_params->agc2_config->adaptive_digital_controller; + adjusted_config.gain_controller2.adaptive_digital.enabled = true; + } + } + + return adjusted_config; +} + +bool AudioProcessingImpl::UseApmVadSubModule( + const AudioProcessing::Config& config, + const absl::optional& experiment_params) { + // The VAD as an APM sub-module is needed only in one case, that is when TS + // and AGC2 are both enabled and when the AGC2 experiment is running and its + // parameters require to fully switch the gain control to AGC2. + return config.transient_suppression.enabled && + config.gain_controller2.enabled && + (config.gain_controller2.input_volume_controller.enabled || + config.gain_controller2.adaptive_digital.enabled) && + experiment_params.has_value() && + experiment_params->agc2_config.has_value(); +} + +AudioProcessingImpl::SubmoduleStates::SubmoduleStates( + bool capture_post_processor_enabled, + bool render_pre_processor_enabled, + bool capture_analyzer_enabled) + : capture_post_processor_enabled_(capture_post_processor_enabled), + render_pre_processor_enabled_(render_pre_processor_enabled), + capture_analyzer_enabled_(capture_analyzer_enabled) {} + +bool AudioProcessingImpl::SubmoduleStates::Update( + bool high_pass_filter_enabled, + bool mobile_echo_controller_enabled, + bool noise_suppressor_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool voice_activity_detector_enabled, + bool gain_adjustment_enabled, + bool echo_controller_enabled, + bool transient_suppressor_enabled) { + bool changed = false; + changed |= (high_pass_filter_enabled != high_pass_filter_enabled_); + changed |= + (mobile_echo_controller_enabled != mobile_echo_controller_enabled_); + changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); + changed |= + (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); + changed |= (gain_controller2_enabled != gain_controller2_enabled_); + changed |= + (voice_activity_detector_enabled != voice_activity_detector_enabled_); + changed |= (gain_adjustment_enabled != gain_adjustment_enabled_); + changed |= (echo_controller_enabled != echo_controller_enabled_); + changed |= (transient_suppressor_enabled != transient_suppressor_enabled_); + if (changed) { + high_pass_filter_enabled_ = high_pass_filter_enabled; + mobile_echo_controller_enabled_ = mobile_echo_controller_enabled; + noise_suppressor_enabled_ = noise_suppressor_enabled; + adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; + gain_controller2_enabled_ = gain_controller2_enabled; + voice_activity_detector_enabled_ = voice_activity_detector_enabled; + gain_adjustment_enabled_ = gain_adjustment_enabled; + echo_controller_enabled_ = echo_controller_enabled; + transient_suppressor_enabled_ = transient_suppressor_enabled; + } + + changed |= first_update_; + first_update_ = false; + return changed; +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandSubModulesActive() + const { + return CaptureMultiBandProcessingPresent(); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingPresent() + const { + // If echo controller is present, assume it performs active processing. + return CaptureMultiBandProcessingActive(/*ec_processing_active=*/true); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingActive( + bool ec_processing_active) const { + return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ || + noise_suppressor_enabled_ || adaptive_gain_controller_enabled_ || + (echo_controller_enabled_ && ec_processing_active); +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureFullBandProcessingActive() + const { + return gain_controller2_enabled_ || capture_post_processor_enabled_ || + gain_adjustment_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::CaptureAnalyzerActive() const { + return capture_analyzer_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandSubModulesActive() + const { + return RenderMultiBandProcessingActive() || mobile_echo_controller_enabled_ || + adaptive_gain_controller_enabled_ || echo_controller_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderFullBandProcessingActive() + const { + return render_pre_processor_enabled_; +} + +bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandProcessingActive() + const { + return false; +} + +bool AudioProcessingImpl::SubmoduleStates::HighPassFilteringRequired() const { + return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ || + noise_suppressor_enabled_; +} + +AudioProcessingImpl::AudioProcessingImpl() + : AudioProcessingImpl(/*config=*/{}, + /*capture_post_processor=*/nullptr, + /*render_pre_processor=*/nullptr, + /*echo_control_factory=*/nullptr, + /*echo_detector=*/nullptr, + /*capture_analyzer=*/nullptr) {} + +std::atomic AudioProcessingImpl::instance_count_(0); + +AudioProcessingImpl::AudioProcessingImpl( + const AudioProcessing::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + std::unique_ptr echo_control_factory, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + use_setup_specific_default_aec3_config_( + UseSetupSpecificDefaultAec3Congfig()), + gain_controller2_experiment_params_(GetGainController2ExperimentParams()), + transient_suppressor_vad_mode_(TransientSuppressor::VadMode::kDefault), + capture_runtime_settings_(RuntimeSettingQueueSize()), + render_runtime_settings_(RuntimeSettingQueueSize()), + capture_runtime_settings_enqueuer_(&capture_runtime_settings_), + render_runtime_settings_enqueuer_(&render_runtime_settings_), + echo_control_factory_(std::move(echo_control_factory)), + config_(AdjustConfig(config, gain_controller2_experiment_params_)), + submodule_states_(!!capture_post_processor, + !!render_pre_processor, + !!capture_analyzer), + submodules_(std::move(capture_post_processor), + std::move(render_pre_processor), + std::move(echo_detector), + std::move(capture_analyzer)), + constants_(!field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"), + !field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"), + EnforceSplitBandHpf(), + MinimizeProcessingForUnusedOutput(), + field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")), + capture_(), + capture_nonlocked_(), + applied_input_volume_stats_reporter_( + InputVolumeStatsReporter::InputVolumeType::kApplied), + recommended_input_volume_stats_reporter_( + InputVolumeStatsReporter::InputVolumeType::kRecommended) { + RTC_LOG(LS_INFO) << "Injected APM submodules:" + "\nEcho control factory: " + << !!echo_control_factory_ + << "\nEcho detector: " << !!submodules_.echo_detector + << "\nCapture analyzer: " << !!submodules_.capture_analyzer + << "\nCapture post processor: " + << !!submodules_.capture_post_processor + << "\nRender pre processor: " + << !!submodules_.render_pre_processor; + if (!DenormalDisabler::IsSupported()) { + RTC_LOG(LS_INFO) << "Denormal disabler unsupported"; + } + + RTC_LOG(LS_INFO) << "AudioProcessing: " << config_.ToString(); + + // Mark Echo Controller enabled if a factory is injected. + capture_nonlocked_.echo_controller_enabled = + static_cast(echo_control_factory_); + + Initialize(); +} + +AudioProcessingImpl::~AudioProcessingImpl() = default; + +int AudioProcessingImpl::Initialize() { + // Run in a single-threaded manner during initialization. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(); + return kNoError; +} + +int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { + // Run in a single-threaded manner during initialization. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(processing_config); + return kNoError; +} + +void AudioProcessingImpl::MaybeInitializeRender( + const StreamConfig& input_config, + const StreamConfig& output_config) { + ProcessingConfig processing_config = formats_.api_format; + processing_config.reverse_input_stream() = input_config; + processing_config.reverse_output_stream() = output_config; + + if (processing_config == formats_.api_format) { + return; + } + + MutexLock lock_capture(&mutex_capture_); + InitializeLocked(processing_config); +} + +void AudioProcessingImpl::InitializeLocked() { + UpdateActiveSubmoduleStates(); + + const int render_audiobuffer_sample_rate_hz = + formats_.api_format.reverse_output_stream().num_frames() == 0 + ? formats_.render_processing_format.sample_rate_hz() + : formats_.api_format.reverse_output_stream().sample_rate_hz(); + if (formats_.api_format.reverse_input_stream().num_channels() > 0) { + render_.render_audio.reset(new AudioBuffer( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_input_stream().num_channels(), + formats_.render_processing_format.sample_rate_hz(), + formats_.render_processing_format.num_channels(), + render_audiobuffer_sample_rate_hz, + formats_.render_processing_format.num_channels())); + if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter = AudioConverter::Create( + formats_.api_format.reverse_input_stream().num_channels(), + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_output_stream().num_channels(), + formats_.api_format.reverse_output_stream().num_frames()); + } else { + render_.render_converter.reset(nullptr); + } + } else { + render_.render_audio.reset(nullptr); + render_.render_converter.reset(nullptr); + } + + capture_.capture_audio.reset(new AudioBuffer( + formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + capture_nonlocked_.capture_processing_format.sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); + SetDownmixMethod(*capture_.capture_audio, + config_.pipeline.capture_downmix_method); + + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() < + formats_.api_format.output_stream().sample_rate_hz() && + formats_.api_format.output_stream().sample_rate_hz() == 48000) { + capture_.capture_fullband_audio.reset( + new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); + SetDownmixMethod(*capture_.capture_fullband_audio, + config_.pipeline.capture_downmix_method); + } else { + capture_.capture_fullband_audio.reset(); + } + + AllocateRenderQueue(); + + InitializeGainController1(); + InitializeTransientSuppressor(); + InitializeHighPassFilter(true); + InitializeResidualEchoDetector(); + InitializeEchoController(); + InitializeGainController2(); + InitializeVoiceActivityDetector(); + InitializeNoiseSuppressor(); + InitializeAnalyzer(); + InitializePostProcessor(); + InitializePreProcessor(); + InitializeCaptureLevelsAdjuster(); + + if (aec_dump_) { + aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis()); + } +} + +void AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { + UpdateActiveSubmoduleStates(); + + formats_.api_format = config; + + // Choose maximum rate to use for the split filtering. + RTC_DCHECK(config_.pipeline.maximum_internal_processing_rate == 48000 || + config_.pipeline.maximum_internal_processing_rate == 32000); + int max_splitting_rate = 48000; + if (config_.pipeline.maximum_internal_processing_rate == 32000) { + max_splitting_rate = config_.pipeline.maximum_internal_processing_rate; + } + + int capture_processing_rate = SuitableProcessRate( + std::min(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.output_stream().sample_rate_hz()), + max_splitting_rate, + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + RTC_DCHECK_NE(8000, capture_processing_rate); + + capture_nonlocked_.capture_processing_format = + StreamConfig(capture_processing_rate); + + int render_processing_rate; + if (!capture_nonlocked_.echo_controller_enabled) { + render_processing_rate = SuitableProcessRate( + std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_output_stream().sample_rate_hz()), + max_splitting_rate, + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + } else { + render_processing_rate = capture_processing_rate; + } + + // If the forward sample rate is 8 kHz, the render stream is also processed + // at this rate. + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate8kHz) { + render_processing_rate = kSampleRate8kHz; + } else { + render_processing_rate = + std::max(render_processing_rate, static_cast(kSampleRate16kHz)); + } + + RTC_DCHECK_NE(8000, render_processing_rate); + + if (submodule_states_.RenderMultiBandSubModulesActive()) { + // By default, downmix the render stream to mono for analysis. This has been + // demonstrated to work well for AEC in most practical scenarios. + const bool multi_channel_render = config_.pipeline.multi_channel_render && + constants_.multi_channel_render_support; + int render_processing_num_channels = + multi_channel_render + ? formats_.api_format.reverse_input_stream().num_channels() + : 1; + formats_.render_processing_format = + StreamConfig(render_processing_rate, render_processing_num_channels); + } else { + formats_.render_processing_format = StreamConfig( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_input_stream().num_channels()); + } + + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate32kHz || + capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate48kHz) { + capture_nonlocked_.split_rate = kSampleRate16kHz; + } else { + capture_nonlocked_.split_rate = + capture_nonlocked_.capture_processing_format.sample_rate_hz(); + } + + InitializeLocked(); +} + +void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { + // Run in a single-threaded manner when applying the settings. + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + + const auto adjusted_config = + AdjustConfig(config, gain_controller2_experiment_params_); + RTC_LOG(LS_INFO) << "AudioProcessing::ApplyConfig: " + << adjusted_config.ToString(); + + const bool pipeline_config_changed = + config_.pipeline.multi_channel_render != + adjusted_config.pipeline.multi_channel_render || + config_.pipeline.multi_channel_capture != + adjusted_config.pipeline.multi_channel_capture || + config_.pipeline.maximum_internal_processing_rate != + adjusted_config.pipeline.maximum_internal_processing_rate || + config_.pipeline.capture_downmix_method != + adjusted_config.pipeline.capture_downmix_method; + + const bool aec_config_changed = + config_.echo_canceller.enabled != + adjusted_config.echo_canceller.enabled || + config_.echo_canceller.mobile_mode != + adjusted_config.echo_canceller.mobile_mode; + + const bool agc1_config_changed = + config_.gain_controller1 != adjusted_config.gain_controller1; + + const bool agc2_config_changed = + config_.gain_controller2 != adjusted_config.gain_controller2; + + const bool ns_config_changed = + config_.noise_suppression.enabled != + adjusted_config.noise_suppression.enabled || + config_.noise_suppression.level != + adjusted_config.noise_suppression.level; + + const bool ts_config_changed = config_.transient_suppression.enabled != + adjusted_config.transient_suppression.enabled; + + const bool pre_amplifier_config_changed = + config_.pre_amplifier.enabled != adjusted_config.pre_amplifier.enabled || + config_.pre_amplifier.fixed_gain_factor != + adjusted_config.pre_amplifier.fixed_gain_factor; + + const bool gain_adjustment_config_changed = + config_.capture_level_adjustment != + adjusted_config.capture_level_adjustment; + + config_ = adjusted_config; + + if (aec_config_changed) { + InitializeEchoController(); + } + + if (ns_config_changed) { + InitializeNoiseSuppressor(); + } + + if (ts_config_changed) { + InitializeTransientSuppressor(); + } + + InitializeHighPassFilter(false); + + if (agc1_config_changed) { + InitializeGainController1(); + } + + const bool config_ok = GainController2::Validate(config_.gain_controller2); + if (!config_ok) { + RTC_LOG(LS_ERROR) + << "Invalid Gain Controller 2 config; using the default config."; + config_.gain_controller2 = AudioProcessing::Config::GainController2(); + } + + if (agc2_config_changed || ts_config_changed) { + // AGC2 also depends on TS because of the possible dependency on the APM VAD + // sub-module. + InitializeGainController2(); + InitializeVoiceActivityDetector(); + } + + if (pre_amplifier_config_changed || gain_adjustment_config_changed) { + InitializeCaptureLevelsAdjuster(); + } + + // Reinitialization must happen after all submodule configuration to avoid + // additional reinitializations on the next capture / render processing call. + if (pipeline_config_changed) { + InitializeLocked(formats_.api_format); + } +} + +void AudioProcessingImpl::OverrideSubmoduleCreationForTesting( + const ApmSubmoduleCreationOverrides& overrides) { + MutexLock lock(&mutex_capture_); + submodule_creation_overrides_ = overrides; +} + +int AudioProcessingImpl::proc_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_fullband_sample_rate_hz() const { + return capture_.capture_fullband_audio + ? capture_.capture_fullband_audio->num_frames() * 100 + : capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_split_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.split_rate; +} + +size_t AudioProcessingImpl::num_reverse_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.render_processing_format.num_channels(); +} + +size_t AudioProcessingImpl::num_input_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.input_stream().num_channels(); +} + +size_t AudioProcessingImpl::num_proc_channels() const { + // Used as callback from submodules, hence locking is not allowed. + const bool multi_channel_capture = config_.pipeline.multi_channel_capture && + constants_.multi_channel_capture_support; + if (capture_nonlocked_.echo_controller_enabled && !multi_channel_capture) { + return 1; + } + return num_output_channels(); +} + +size_t AudioProcessingImpl::num_output_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.output_stream().num_channels(); +} + +void AudioProcessingImpl::set_output_will_be_muted(bool muted) { + MutexLock lock(&mutex_capture_); + HandleCaptureOutputUsedSetting(!muted); +} + +void AudioProcessingImpl::HandleCaptureOutputUsedSetting( + bool capture_output_used) { + capture_.capture_output_used = + capture_output_used || !constants_.minimize_processing_for_unused_output; + + if (submodules_.agc_manager.get()) { + submodules_.agc_manager->HandleCaptureOutputUsedChange( + capture_.capture_output_used); + } + if (submodules_.echo_controller) { + submodules_.echo_controller->SetCaptureOutputUsage( + capture_.capture_output_used); + } + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->SetCaptureOutputUsage( + capture_.capture_output_used); + } + if (submodules_.gain_controller2) { + submodules_.gain_controller2->SetCaptureOutputUsed( + capture_.capture_output_used); + } +} + +void AudioProcessingImpl::SetRuntimeSetting(RuntimeSetting setting) { + PostRuntimeSetting(setting); +} + +bool AudioProcessingImpl::PostRuntimeSetting(RuntimeSetting setting) { + switch (setting.type()) { + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: + return render_runtime_settings_enqueuer_.Enqueue(setting); + case RuntimeSetting::Type::kCapturePreGain: + case RuntimeSetting::Type::kCapturePostGain: + case RuntimeSetting::Type::kCaptureCompressionGain: + case RuntimeSetting::Type::kCaptureFixedPostGain: + case RuntimeSetting::Type::kCaptureOutputUsed: + return capture_runtime_settings_enqueuer_.Enqueue(setting); + case RuntimeSetting::Type::kPlayoutVolumeChange: { + bool enqueueing_successful; + enqueueing_successful = + capture_runtime_settings_enqueuer_.Enqueue(setting); + enqueueing_successful = + render_runtime_settings_enqueuer_.Enqueue(setting) && + enqueueing_successful; + return enqueueing_successful; + } + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + return true; + } + // The language allows the enum to have a non-enumerator + // value. Check that this doesn't happen. + RTC_DCHECK_NOTREACHED(); + return true; +} + +AudioProcessingImpl::RuntimeSettingEnqueuer::RuntimeSettingEnqueuer( + SwapQueue* runtime_settings) + : runtime_settings_(*runtime_settings) { + RTC_DCHECK(runtime_settings); +} + +AudioProcessingImpl::RuntimeSettingEnqueuer::~RuntimeSettingEnqueuer() = + default; + +bool AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue( + RuntimeSetting setting) { + const bool successful_insert = runtime_settings_.Insert(&setting); + + if (!successful_insert) { + RTC_LOG(LS_ERROR) << "Cannot enqueue a new runtime setting."; + } + return successful_insert; +} + +void AudioProcessingImpl::MaybeInitializeCapture( + const StreamConfig& input_config, + const StreamConfig& output_config) { + ProcessingConfig processing_config; + bool reinitialization_required = false; + { + // Acquire the capture lock in order to access api_format. The lock is + // released immediately, as we may need to acquire the render lock as part + // of the conditional reinitialization. + MutexLock lock_capture(&mutex_capture_); + processing_config = formats_.api_format; + reinitialization_required = UpdateActiveSubmoduleStates(); + } + + if (processing_config.input_stream() != input_config) { + reinitialization_required = true; + } + + if (processing_config.output_stream() != output_config) { + reinitialization_required = true; + } + + if (reinitialization_required) { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + // Reread the API format since the render format may have changed. + processing_config = formats_.api_format; + processing_config.input_stream() = input_config; + processing_config.output_stream() = output_config; + InitializeLocked(processing_config); + } +} + +int AudioProcessingImpl::ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); + DenormalDisabler denormal_disabler; + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeCapture(input_config, output_config); + + MutexLock lock_capture(&mutex_capture_); + + if (aec_dump_) { + RecordUnprocessedCaptureStream(src); + } + + capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyFrom( + src, formats_.api_format.input_stream()); + } + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(), + dest); + } else { + capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); + } + + if (aec_dump_) { + RecordProcessedCaptureStream(dest); + } + return kNoError; +} + +void AudioProcessingImpl::HandleCaptureRuntimeSettings() { + RuntimeSetting setting; + int num_settings_processed = 0; + while (capture_runtime_settings_.Remove(&setting)) { + if (aec_dump_) { + aec_dump_->WriteRuntimeSetting(setting); + } + switch (setting.type()) { + case RuntimeSetting::Type::kCapturePreGain: + if (config_.pre_amplifier.enabled || + config_.capture_level_adjustment.enabled) { + float value; + setting.GetFloat(&value); + // If the pre-amplifier is used, apply the new gain to the + // pre-amplifier regardless if the capture level adjustment is + // activated. This approach allows both functionalities to coexist + // until they have been properly merged. + if (config_.pre_amplifier.enabled) { + config_.pre_amplifier.fixed_gain_factor = value; + } else { + config_.capture_level_adjustment.pre_gain_factor = value; + } + + // Use both the pre-amplifier and the capture level adjustment gains + // as pre-gains. + float gain = 1.f; + if (config_.pre_amplifier.enabled) { + gain *= config_.pre_amplifier.fixed_gain_factor; + } + if (config_.capture_level_adjustment.enabled) { + gain *= config_.capture_level_adjustment.pre_gain_factor; + } + + submodules_.capture_levels_adjuster->SetPreGain(gain); + } + // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump. + break; + case RuntimeSetting::Type::kCapturePostGain: + if (config_.capture_level_adjustment.enabled) { + float value; + setting.GetFloat(&value); + config_.capture_level_adjustment.post_gain_factor = value; + submodules_.capture_levels_adjuster->SetPostGain( + config_.capture_level_adjustment.post_gain_factor); + } + // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump. + break; + case RuntimeSetting::Type::kCaptureCompressionGain: { + if (!submodules_.agc_manager && + !(submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled)) { + float value; + setting.GetFloat(&value); + int int_value = static_cast(value + .5f); + config_.gain_controller1.compression_gain_db = int_value; + if (submodules_.gain_control) { + int error = + submodules_.gain_control->set_compression_gain_db(int_value); + RTC_DCHECK_EQ(kNoError, error); + } + } + break; + } + case RuntimeSetting::Type::kCaptureFixedPostGain: { + if (submodules_.gain_controller2) { + float value; + setting.GetFloat(&value); + config_.gain_controller2.fixed_digital.gain_db = value; + submodules_.gain_controller2->SetFixedGainDb(value); + } + break; + } + case RuntimeSetting::Type::kPlayoutVolumeChange: { + int value; + setting.GetInt(&value); + capture_.playout_volume = value; + break; + } + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + case RuntimeSetting::Type::kCaptureOutputUsed: + bool value; + setting.GetBool(&value); + HandleCaptureOutputUsedSetting(value); + break; + } + ++num_settings_processed; + } + + if (num_settings_processed >= RuntimeSettingQueueSize()) { + // Handle overrun of the runtime settings queue, which likely will has + // caused settings to be discarded. + HandleOverrunInCaptureRuntimeSettingsQueue(); + } +} + +void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() { + // Fall back to a safe state for the case when a setting for capture output + // usage setting has been missed. + HandleCaptureOutputUsedSetting(/*capture_output_used=*/true); +} + +void AudioProcessingImpl::HandleRenderRuntimeSettings() { + RuntimeSetting setting; + while (render_runtime_settings_.Remove(&setting)) { + if (aec_dump_) { + aec_dump_->WriteRuntimeSetting(setting); + } + switch (setting.type()) { + case RuntimeSetting::Type::kPlayoutAudioDeviceChange: // fall-through + case RuntimeSetting::Type::kPlayoutVolumeChange: // fall-through + case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting: + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->SetRuntimeSetting(setting); + } + break; + case RuntimeSetting::Type::kCapturePreGain: // fall-through + case RuntimeSetting::Type::kCapturePostGain: // fall-through + case RuntimeSetting::Type::kCaptureCompressionGain: // fall-through + case RuntimeSetting::Type::kCaptureFixedPostGain: // fall-through + case RuntimeSetting::Type::kCaptureOutputUsed: // fall-through + case RuntimeSetting::Type::kNotSpecified: + RTC_DCHECK_NOTREACHED(); + break; + } + } +} + +void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) { + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + + if (submodules_.echo_control_mobile) { + EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(), + num_reverse_channels(), + &aecm_render_queue_buffer_); + RTC_DCHECK(aecm_render_signal_queue_); + // Insert the samples into the queue. + if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = + aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_); + RTC_DCHECK(result); + } + } + + if (!submodules_.agc_manager && submodules_.gain_control) { + GainControlImpl::PackRenderAudioBuffer(*audio, &agc_render_queue_buffer_); + // Insert the samples into the queue. + if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_); + RTC_DCHECK(result); + } + } +} + +void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) { + if (submodules_.echo_detector) { + PackRenderAudioBufferForEchoDetector(*audio, red_render_queue_buffer_); + RTC_DCHECK(red_render_signal_queue_); + // Insert the samples into the queue. + if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_); + RTC_DCHECK(result); + } + } +} + +void AudioProcessingImpl::AllocateRenderQueue() { + const size_t new_agc_render_queue_element_max_size = + std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerBand); + + const size_t new_red_render_queue_element_max_size = + std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerFrame); + + // Reallocate the queues if the queue item sizes are too small to fit the + // data to put in the queues. + + if (agc_render_queue_element_max_size_ < + new_agc_render_queue_element_max_size) { + agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size; + + std::vector template_queue_element( + agc_render_queue_element_max_size_); + + agc_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier( + agc_render_queue_element_max_size_))); + + agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_); + agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_); + } else { + agc_render_signal_queue_->Clear(); + } + + if (submodules_.echo_detector) { + if (red_render_queue_element_max_size_ < + new_red_render_queue_element_max_size) { + red_render_queue_element_max_size_ = + new_red_render_queue_element_max_size; + + std::vector template_queue_element( + red_render_queue_element_max_size_); + + red_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier( + red_render_queue_element_max_size_))); + + red_render_queue_buffer_.resize(red_render_queue_element_max_size_); + red_capture_queue_buffer_.resize(red_render_queue_element_max_size_); + } else { + red_render_signal_queue_->Clear(); + } + } +} + +void AudioProcessingImpl::EmptyQueuedRenderAudio() { + MutexLock lock_capture(&mutex_capture_); + EmptyQueuedRenderAudioLocked(); +} + +void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() { + if (submodules_.echo_control_mobile) { + RTC_DCHECK(aecm_render_signal_queue_); + while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) { + submodules_.echo_control_mobile->ProcessRenderAudio( + aecm_capture_queue_buffer_); + } + } + + if (submodules_.gain_control) { + while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) { + submodules_.gain_control->ProcessRenderAudio(agc_capture_queue_buffer_); + } + } + + if (submodules_.echo_detector) { + while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { + submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_); + } + } +} + +int AudioProcessingImpl::ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); + + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeCapture(input_config, output_config); + + MutexLock lock_capture(&mutex_capture_); + DenormalDisabler denormal_disabler; + + if (aec_dump_) { + RecordUnprocessedCaptureStream(src, input_config); + } + + capture_.capture_audio->CopyFrom(src, input_config); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyFrom(src, input_config); + } + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + if (submodule_states_.CaptureMultiBandProcessingPresent() || + submodule_states_.CaptureFullBandProcessingActive()) { + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(output_config, dest); + } else { + capture_.capture_audio->CopyTo(output_config, dest); + } + } + + if (aec_dump_) { + RecordProcessedCaptureStream(dest, output_config); + } + return kNoError; +} + +int AudioProcessingImpl::ProcessCaptureStreamLocked() { + EmptyQueuedRenderAudioLocked(); + HandleCaptureRuntimeSettings(); + DenormalDisabler denormal_disabler; + + // Ensure that not both the AEC and AECM are active at the same time. + // TODO(peah): Simplify once the public API Enable functions for these + // are moved to APM. + RTC_DCHECK_LE( + !!submodules_.echo_controller + !!submodules_.echo_control_mobile, 1); + + data_dumper_->DumpRaw( + "applied_input_volume", + capture_.applied_input_volume.value_or(kUnspecifiedDataDumpInputVolume)); + + AudioBuffer* capture_buffer = capture_.capture_audio.get(); // For brevity. + AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get(); + + if (submodules_.high_pass_filter && + config_.high_pass_filter.apply_in_full_band && + !constants_.enforce_split_band_hpf) { + submodules_.high_pass_filter->Process(capture_buffer, + /*use_split_band_data=*/false); + } + + if (submodules_.capture_levels_adjuster) { + if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) { + // When the input volume is emulated, retrieve the volume applied to the + // input audio and notify that to APM so that the volume is passed to the + // active AGC. + set_stream_analog_level_locked( + submodules_.capture_levels_adjuster->GetAnalogMicGainLevel()); + } + submodules_.capture_levels_adjuster->ApplyPreLevelAdjustment( + *capture_buffer); + } + + capture_input_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + const bool log_rms = ++capture_rms_interval_counter_ >= 1000; + if (log_rms) { + capture_rms_interval_counter_ = 0; + RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms", + levels.average, 1, RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + if (capture_.applied_input_volume.has_value()) { + applied_input_volume_stats_reporter_.UpdateStatistics( + *capture_.applied_input_volume); + } + + if (submodules_.echo_controller) { + // Determine if the echo path gain has changed by checking all the gains + // applied before AEC. + capture_.echo_path_gain_change = capture_.applied_input_volume_changed; + + // Detect and flag any change in the capture level adjustment pre-gain. + if (submodules_.capture_levels_adjuster) { + float pre_adjustment_gain = + submodules_.capture_levels_adjuster->GetPreAdjustmentGain(); + capture_.echo_path_gain_change = + capture_.echo_path_gain_change || + (capture_.prev_pre_adjustment_gain != pre_adjustment_gain && + capture_.prev_pre_adjustment_gain >= 0.0f); + capture_.prev_pre_adjustment_gain = pre_adjustment_gain; + } + + // Detect volume change. + capture_.echo_path_gain_change = + capture_.echo_path_gain_change || + (capture_.prev_playout_volume != capture_.playout_volume && + capture_.prev_playout_volume >= 0); + capture_.prev_playout_volume = capture_.playout_volume; + + submodules_.echo_controller->AnalyzeCapture(capture_buffer); + } + + if (submodules_.agc_manager) { + submodules_.agc_manager->AnalyzePreProcess(*capture_buffer); + } + + if (submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled) { + // Expect the volume to be available if the input controller is enabled. + RTC_DCHECK(capture_.applied_input_volume.has_value()); + if (capture_.applied_input_volume.has_value()) { + submodules_.gain_controller2->Analyze(*capture_.applied_input_volume, + *capture_buffer); + } + } + + if (submodule_states_.CaptureMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->SplitIntoFrequencyBands(); + } + + const bool multi_channel_capture = config_.pipeline.multi_channel_capture && + constants_.multi_channel_capture_support; + if (submodules_.echo_controller && !multi_channel_capture) { + // Force down-mixing of the number of channels after the detection of + // capture signal saturation. + // TODO(peah): Look into ensuring that this kind of tampering with the + // AudioBuffer functionality should not be needed. + capture_buffer->set_num_channels(1); + } + + if (submodules_.high_pass_filter && + (!config_.high_pass_filter.apply_in_full_band || + constants_.enforce_split_band_hpf)) { + submodules_.high_pass_filter->Process(capture_buffer, + /*use_split_band_data=*/true); + } + + if (submodules_.gain_control) { + RETURN_ON_ERR( + submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer)); + } + + if ((!config_.noise_suppression.analyze_linear_aec_output_when_available || + !linear_aec_buffer || submodules_.echo_control_mobile) && + submodules_.noise_suppressor) { + submodules_.noise_suppressor->Analyze(*capture_buffer); + } + + if (submodules_.echo_control_mobile) { + // Ensure that the stream delay was set before the call to the + // AECM ProcessCaptureAudio function. + if (!capture_.was_stream_delay_set) { + return AudioProcessing::kStreamParameterNotSetError; + } + + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->Process(capture_buffer); + } + + RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio( + capture_buffer, stream_delay_ms())); + } else { + if (submodules_.echo_controller) { + data_dumper_->DumpRaw("stream_delay", stream_delay_ms()); + + if (capture_.was_stream_delay_set) { + submodules_.echo_controller->SetAudioBufferDelay(stream_delay_ms()); + } + + submodules_.echo_controller->ProcessCapture( + capture_buffer, linear_aec_buffer, capture_.echo_path_gain_change); + } + + if (config_.noise_suppression.analyze_linear_aec_output_when_available && + linear_aec_buffer && submodules_.noise_suppressor) { + submodules_.noise_suppressor->Analyze(*linear_aec_buffer); + } + + if (submodules_.noise_suppressor) { + submodules_.noise_suppressor->Process(capture_buffer); + } + } + + if (submodules_.agc_manager) { + submodules_.agc_manager->Process(*capture_buffer); + + absl::optional new_digital_gain = + submodules_.agc_manager->GetDigitalComressionGain(); + if (new_digital_gain && submodules_.gain_control) { + submodules_.gain_control->set_compression_gain_db(*new_digital_gain); + } + } + + if (submodules_.gain_control) { + // TODO(peah): Add reporting from AEC3 whether there is echo. + RETURN_ON_ERR(submodules_.gain_control->ProcessCaptureAudio( + capture_buffer, /*stream_has_echo*/ false)); + } + + if (submodule_states_.CaptureMultiBandProcessingPresent() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->MergeFrequencyBands(); + } + + if (capture_.capture_output_used) { + if (capture_.capture_fullband_audio) { + const auto& ec = submodules_.echo_controller; + bool ec_active = ec ? ec->ActiveProcessing() : false; + // Only update the fullband buffer if the multiband processing has changed + // the signal. Keep the original signal otherwise. + if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) { + capture_buffer->CopyTo(capture_.capture_fullband_audio.get()); + } + capture_buffer = capture_.capture_fullband_audio.get(); + } + + if (submodules_.echo_detector) { + submodules_.echo_detector->AnalyzeCaptureAudio( + rtc::ArrayView(capture_buffer->channels()[0], + capture_buffer->num_frames())); + } + + absl::optional voice_probability; + if (!!submodules_.voice_activity_detector) { + voice_probability = submodules_.voice_activity_detector->Analyze( + AudioFrameView(capture_buffer->channels(), + capture_buffer->num_channels(), + capture_buffer->num_frames())); + } + + if (submodules_.transient_suppressor) { + float transient_suppressor_voice_probability = 1.0f; + switch (transient_suppressor_vad_mode_) { + case TransientSuppressor::VadMode::kDefault: + if (submodules_.agc_manager) { + transient_suppressor_voice_probability = + submodules_.agc_manager->voice_probability(); + } + break; + case TransientSuppressor::VadMode::kRnnVad: + RTC_DCHECK(voice_probability.has_value()); + transient_suppressor_voice_probability = *voice_probability; + break; + case TransientSuppressor::VadMode::kNoVad: + // The transient suppressor will ignore `voice_probability`. + break; + } + float delayed_voice_probability = + submodules_.transient_suppressor->Suppress( + capture_buffer->channels()[0], capture_buffer->num_frames(), + capture_buffer->num_channels(), + capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->num_frames_per_band(), + /*reference_data=*/nullptr, /*reference_length=*/0, + transient_suppressor_voice_probability, capture_.key_pressed); + if (voice_probability.has_value()) { + *voice_probability = delayed_voice_probability; + } + } + + // Experimental APM sub-module that analyzes `capture_buffer`. + if (submodules_.capture_analyzer) { + submodules_.capture_analyzer->Analyze(capture_buffer); + } + + if (submodules_.gain_controller2) { + // TODO(bugs.webrtc.org/7494): Let AGC2 detect applied input volume + // changes. + submodules_.gain_controller2->Process( + voice_probability, capture_.applied_input_volume_changed, + capture_buffer); + } + + if (submodules_.capture_post_processor) { + submodules_.capture_post_processor->Process(capture_buffer); + } + + capture_output_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + if (log_rms) { + RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1, + RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + // Compute echo-detector stats. + if (submodules_.echo_detector) { + auto ed_metrics = submodules_.echo_detector->GetMetrics(); + capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood; + capture_.stats.residual_echo_likelihood_recent_max = + ed_metrics.echo_likelihood_recent_max; + } + } + + // Compute echo-controller stats. + if (submodules_.echo_controller) { + auto ec_metrics = submodules_.echo_controller->GetMetrics(); + capture_.stats.echo_return_loss = ec_metrics.echo_return_loss; + capture_.stats.echo_return_loss_enhancement = + ec_metrics.echo_return_loss_enhancement; + capture_.stats.delay_ms = ec_metrics.delay_ms; + } + + // Pass stats for reporting. + stats_reporter_.UpdateStatistics(capture_.stats); + + UpdateRecommendedInputVolumeLocked(); + if (capture_.recommended_input_volume.has_value()) { + recommended_input_volume_stats_reporter_.UpdateStatistics( + *capture_.recommended_input_volume); + } + + if (submodules_.capture_levels_adjuster) { + submodules_.capture_levels_adjuster->ApplyPostLevelAdjustment( + *capture_buffer); + + if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) { + // If the input volume emulation is used, retrieve the recommended input + // volume and set that to emulate the input volume on the next processed + // audio frame. + RTC_DCHECK(capture_.recommended_input_volume.has_value()); + submodules_.capture_levels_adjuster->SetAnalogMicGainLevel( + *capture_.recommended_input_volume); + } + } + + // Temporarily set the output to zero after the stream has been unmuted + // (capture output is again used). The purpose of this is to avoid clicks and + // artefacts in the audio that results when the processing again is + // reactivated after unmuting. + if (!capture_.capture_output_used_last_frame && + capture_.capture_output_used) { + for (size_t ch = 0; ch < capture_buffer->num_channels(); ++ch) { + rtc::ArrayView channel_view(capture_buffer->channels()[ch], + capture_buffer->num_frames()); + std::fill(channel_view.begin(), channel_view.end(), 0.f); + } + } + capture_.capture_output_used_last_frame = capture_.capture_output_used; + + capture_.was_stream_delay_set = false; + + data_dumper_->DumpRaw("recommended_input_volume", + capture_.recommended_input_volume.value_or( + kUnspecifiedDataDumpInputVolume)); + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream( + const float* const* data, + const StreamConfig& reverse_config) { + TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig"); + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + RTC_DCHECK(data); + for (size_t i = 0; i < reverse_config.num_channels(); ++i) { + RTC_DCHECK(data[i]); + } + RETURN_ON_ERR( + AudioFormatValidityToErrorCode(ValidateAudioFormat(reverse_config))); + + MaybeInitializeRender(reverse_config, reverse_config); + return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); +} + +int AudioProcessingImpl::ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + + MaybeInitializeRender(input_config, output_config); + + RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config)); + + if (submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()) { + render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), + dest); + } else if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter->Convert(src, input_config.num_samples(), dest, + output_config.num_samples()); + } else { + CopyAudioIfNeeded(src, input_config.num_frames(), + input_config.num_channels(), dest); + } + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStreamLocked( + const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) { + if (aec_dump_) { + const size_t channel_size = + formats_.api_format.reverse_input_stream().num_frames(); + const size_t num_channels = + formats_.api_format.reverse_input_stream().num_channels(); + aec_dump_->WriteRenderStreamMessage( + AudioFrameView(src, num_channels, channel_size)); + } + render_.render_audio->CopyFrom(src, + formats_.api_format.reverse_input_stream()); + return ProcessRenderStreamLocked(); +} + +int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); + + MutexLock lock(&mutex_render_); + DenormalDisabler denormal_disabler; + + RETURN_ON_ERR( + HandleUnsupportedAudioFormats(src, input_config, output_config, dest)); + MaybeInitializeRender(input_config, output_config); + + if (aec_dump_) { + aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(), + input_config.num_channels()); + } + + render_.render_audio->CopyFrom(src, input_config); + RETURN_ON_ERR(ProcessRenderStreamLocked()); + if (submodule_states_.RenderMultiBandProcessingActive() || + submodule_states_.RenderFullBandProcessingActive()) { + render_.render_audio->CopyTo(output_config, dest); + } + return kNoError; +} + +int AudioProcessingImpl::ProcessRenderStreamLocked() { + AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity. + + HandleRenderRuntimeSettings(); + DenormalDisabler denormal_disabler; + + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->Process(render_buffer); + } + + QueueNonbandedRenderAudio(render_buffer); + + if (submodule_states_.RenderMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->SplitIntoFrequencyBands(); + } + + if (submodule_states_.RenderMultiBandSubModulesActive()) { + QueueBandedRenderAudio(render_buffer); + } + + // TODO(peah): Perform the queuing inside QueueRenderAudiuo(). + if (submodules_.echo_controller) { + submodules_.echo_controller->AnalyzeRender(render_buffer); + } + + if (submodule_states_.RenderMultiBandProcessingActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->MergeFrequencyBands(); + } + + return kNoError; +} + +int AudioProcessingImpl::set_stream_delay_ms(int delay) { + MutexLock lock(&mutex_capture_); + Error retval = kNoError; + capture_.was_stream_delay_set = true; + + if (delay < 0) { + delay = 0; + retval = kBadStreamParameterWarning; + } + + // TODO(ajm): the max is rather arbitrarily chosen; investigate. + if (delay > 500) { + delay = 500; + retval = kBadStreamParameterWarning; + } + + capture_nonlocked_.stream_delay_ms = delay; + return retval; +} + +bool AudioProcessingImpl::GetLinearAecOutput( + rtc::ArrayView> linear_output) const { + MutexLock lock(&mutex_capture_); + AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get(); + + RTC_DCHECK(linear_aec_buffer); + if (linear_aec_buffer) { + RTC_DCHECK_EQ(1, linear_aec_buffer->num_bands()); + RTC_DCHECK_EQ(linear_output.size(), linear_aec_buffer->num_channels()); + + for (size_t ch = 0; ch < linear_aec_buffer->num_channels(); ++ch) { + RTC_DCHECK_EQ(linear_output[ch].size(), linear_aec_buffer->num_frames()); + rtc::ArrayView channel_view = + rtc::ArrayView(linear_aec_buffer->channels_const()[ch], + linear_aec_buffer->num_frames()); + FloatS16ToFloat(channel_view.data(), channel_view.size(), + linear_output[ch].data()); + } + return true; + } + RTC_LOG(LS_ERROR) << "No linear AEC output available"; + RTC_DCHECK_NOTREACHED(); + return false; +} + +int AudioProcessingImpl::stream_delay_ms() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.stream_delay_ms; +} + +void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { + MutexLock lock(&mutex_capture_); + capture_.key_pressed = key_pressed; +} + +void AudioProcessingImpl::set_stream_analog_level(int level) { + MutexLock lock_capture(&mutex_capture_); + set_stream_analog_level_locked(level); +} + +void AudioProcessingImpl::set_stream_analog_level_locked(int level) { + capture_.applied_input_volume_changed = + capture_.applied_input_volume.has_value() && + *capture_.applied_input_volume != level; + capture_.applied_input_volume = level; + + // Invalidate any previously recommended input volume which will be updated by + // `ProcessStream()`. + capture_.recommended_input_volume = absl::nullopt; + + if (submodules_.agc_manager) { + submodules_.agc_manager->set_stream_analog_level(level); + return; + } + + if (submodules_.gain_control) { + int error = submodules_.gain_control->set_stream_analog_level(level); + RTC_DCHECK_EQ(kNoError, error); + return; + } +} + +int AudioProcessingImpl::recommended_stream_analog_level() const { + MutexLock lock_capture(&mutex_capture_); + if (!capture_.applied_input_volume.has_value()) { + RTC_LOG(LS_ERROR) << "set_stream_analog_level has not been called"; + } + // Input volume to recommend when `set_stream_analog_level()` is not called. + constexpr int kFallBackInputVolume = 255; + // When APM has no input volume to recommend, return the latest applied input + // volume that has been observed in order to possibly produce no input volume + // change. If no applied input volume has been observed, return a fall-back + // value. + return capture_.recommended_input_volume.value_or( + capture_.applied_input_volume.value_or(kFallBackInputVolume)); +} + +void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() { + if (!capture_.applied_input_volume.has_value()) { + // When `set_stream_analog_level()` is not called, no input level can be + // recommended. + capture_.recommended_input_volume = absl::nullopt; + return; + } + + if (submodules_.agc_manager) { + capture_.recommended_input_volume = + submodules_.agc_manager->recommended_analog_level(); + return; + } + + if (submodules_.gain_control) { + capture_.recommended_input_volume = + submodules_.gain_control->stream_analog_level(); + return; + } + + if (submodules_.gain_controller2 && + config_.gain_controller2.input_volume_controller.enabled) { + capture_.recommended_input_volume = + submodules_.gain_controller2->recommended_input_volume(); + return; + } + + capture_.recommended_input_volume = capture_.applied_input_volume; +} + +bool AudioProcessingImpl::CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + std::unique_ptr aec_dump = + AecDumpFactory::Create(file_name, max_log_size_bytes, worker_queue); + if (!aec_dump) { + return false; + } + + AttachAecDump(std::move(aec_dump)); + return true; +} + +bool AudioProcessingImpl::CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + std::unique_ptr aec_dump = + AecDumpFactory::Create(handle, max_log_size_bytes, worker_queue); + if (!aec_dump) { + return false; + } + + AttachAecDump(std::move(aec_dump)); + return true; +} + +void AudioProcessingImpl::AttachAecDump(std::unique_ptr aec_dump) { + RTC_DCHECK(aec_dump); + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + + // The previously attached AecDump will be destroyed with the + // 'aec_dump' parameter, which is after locks are released. + aec_dump_.swap(aec_dump); + WriteAecDumpConfigMessage(true); + aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis()); +} + +void AudioProcessingImpl::DetachAecDump() { + // The d-tor of a task-queue based AecDump blocks until all pending + // tasks are done. This construction avoids blocking while holding + // the render and capture locks. + std::unique_ptr aec_dump = nullptr; + { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + aec_dump = std::move(aec_dump_); + } +} + +AudioProcessing::Config AudioProcessingImpl::GetConfig() const { + MutexLock lock_render(&mutex_render_); + MutexLock lock_capture(&mutex_capture_); + return config_; +} + +bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { + return submodule_states_.Update( + config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile, + !!submodules_.noise_suppressor, !!submodules_.gain_control, + !!submodules_.gain_controller2, !!submodules_.voice_activity_detector, + config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled, + capture_nonlocked_.echo_controller_enabled, + !!submodules_.transient_suppressor); +} + +void AudioProcessingImpl::InitializeTransientSuppressor() { + // Choose the VAD mode for TS and detect a VAD mode change. + const TransientSuppressor::VadMode previous_vad_mode = + transient_suppressor_vad_mode_; + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kDefault; + if (UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { + transient_suppressor_vad_mode_ = TransientSuppressor::VadMode::kRnnVad; + } + const bool vad_mode_changed = + previous_vad_mode != transient_suppressor_vad_mode_; + + if (config_.transient_suppression.enabled && + !constants_.transient_suppressor_forced_off) { + // Attempt to create a transient suppressor, if one is not already created. + if (!submodules_.transient_suppressor || vad_mode_changed) { + submodules_.transient_suppressor = CreateTransientSuppressor( + submodule_creation_overrides_, transient_suppressor_vad_mode_, + proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, + num_proc_channels()); + if (!submodules_.transient_suppressor) { + RTC_LOG(LS_WARNING) + << "No transient suppressor created (probably disabled)"; + } + } else { + submodules_.transient_suppressor->Initialize( + proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, + num_proc_channels()); + } + } else { + submodules_.transient_suppressor.reset(); + } +} + +void AudioProcessingImpl::InitializeHighPassFilter(bool forced_reset) { + bool high_pass_filter_needed_by_aec = + config_.echo_canceller.enabled && + config_.echo_canceller.enforce_high_pass_filtering && + !config_.echo_canceller.mobile_mode; + if (submodule_states_.HighPassFilteringRequired() || + high_pass_filter_needed_by_aec) { + bool use_full_band = config_.high_pass_filter.apply_in_full_band && + !constants_.enforce_split_band_hpf; + int rate = use_full_band ? proc_fullband_sample_rate_hz() + : proc_split_sample_rate_hz(); + size_t num_channels = + use_full_band ? num_output_channels() : num_proc_channels(); + + if (!submodules_.high_pass_filter || + rate != submodules_.high_pass_filter->sample_rate_hz() || + forced_reset || + num_channels != submodules_.high_pass_filter->num_channels()) { + submodules_.high_pass_filter.reset( + new HighPassFilter(rate, num_channels)); + } + } else { + submodules_.high_pass_filter.reset(); + } +} + +void AudioProcessingImpl::InitializeEchoController() { + bool use_echo_controller = + echo_control_factory_ || + (config_.echo_canceller.enabled && !config_.echo_canceller.mobile_mode); + + if (use_echo_controller) { + // Create and activate the echo controller. + if (echo_control_factory_) { + submodules_.echo_controller = echo_control_factory_->Create( + proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels()); + RTC_DCHECK(submodules_.echo_controller); + } else { + EchoCanceller3Config config; + absl::optional multichannel_config; + if (use_setup_specific_default_aec3_config_) { + multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig(); + } + submodules_.echo_controller = std::make_unique( + config, multichannel_config, proc_sample_rate_hz(), + num_reverse_channels(), num_proc_channels()); + } + + // Setup the storage for returning the linear AEC output. + if (config_.echo_canceller.export_linear_aec_output) { + constexpr int kLinearOutputRateHz = 16000; + capture_.linear_aec_output = std::make_unique( + kLinearOutputRateHz, num_proc_channels(), kLinearOutputRateHz, + num_proc_channels(), kLinearOutputRateHz, num_proc_channels()); + } else { + capture_.linear_aec_output.reset(); + } + + capture_nonlocked_.echo_controller_enabled = true; + + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); + return; + } + + submodules_.echo_controller.reset(); + capture_nonlocked_.echo_controller_enabled = false; + capture_.linear_aec_output.reset(); + + if (!config_.echo_canceller.enabled) { + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); + return; + } + + if (config_.echo_canceller.mobile_mode) { + // Create and activate AECM. + size_t max_element_size = + std::max(static_cast(1), + kMaxAllowedValuesOfSamplesPerBand * + EchoControlMobileImpl::NumCancellersRequired( + num_output_channels(), num_reverse_channels())); + + std::vector template_queue_element(max_element_size); + + aecm_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier(max_element_size))); + + aecm_render_queue_buffer_.resize(max_element_size); + aecm_capture_queue_buffer_.resize(max_element_size); + + submodules_.echo_control_mobile.reset(new EchoControlMobileImpl()); + + submodules_.echo_control_mobile->Initialize(proc_split_sample_rate_hz(), + num_reverse_channels(), + num_output_channels()); + return; + } + + submodules_.echo_control_mobile.reset(); + aecm_render_signal_queue_.reset(); +} + +void AudioProcessingImpl::InitializeGainController1() { + if (config_.gain_controller2.enabled && + config_.gain_controller2.input_volume_controller.enabled && + config_.gain_controller1.enabled && + (config_.gain_controller1.mode == + AudioProcessing::Config::GainController1::kAdaptiveAnalog || + config_.gain_controller1.analog_gain_controller.enabled)) { + RTC_LOG(LS_ERROR) << "APM configuration not valid: " + << "Multiple input volume controllers enabled."; + } + + if (!config_.gain_controller1.enabled) { + submodules_.agc_manager.reset(); + submodules_.gain_control.reset(); + return; + } + + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.GainController.Analog.Enabled", + config_.gain_controller1.analog_gain_controller.enabled); + + if (!submodules_.gain_control) { + submodules_.gain_control.reset(new GainControlImpl()); + } + + submodules_.gain_control->Initialize(num_proc_channels(), + proc_sample_rate_hz()); + if (!config_.gain_controller1.analog_gain_controller.enabled) { + int error = submodules_.gain_control->set_mode( + Agc1ConfigModeToInterfaceMode(config_.gain_controller1.mode)); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->set_target_level_dbfs( + config_.gain_controller1.target_level_dbfs); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->set_compression_gain_db( + config_.gain_controller1.compression_gain_db); + RTC_DCHECK_EQ(kNoError, error); + error = submodules_.gain_control->enable_limiter( + config_.gain_controller1.enable_limiter); + RTC_DCHECK_EQ(kNoError, error); + constexpr int kAnalogLevelMinimum = 0; + constexpr int kAnalogLevelMaximum = 255; + error = submodules_.gain_control->set_analog_level_limits( + kAnalogLevelMinimum, kAnalogLevelMaximum); + RTC_DCHECK_EQ(kNoError, error); + + submodules_.agc_manager.reset(); + return; + } + + if (!submodules_.agc_manager.get() || + submodules_.agc_manager->num_channels() != + static_cast(num_proc_channels())) { + int stream_analog_level = -1; + const bool re_creation = !!submodules_.agc_manager; + if (re_creation) { + stream_analog_level = submodules_.agc_manager->recommended_analog_level(); + } + submodules_.agc_manager.reset(new AgcManagerDirect( + num_proc_channels(), config_.gain_controller1.analog_gain_controller)); + if (re_creation) { + submodules_.agc_manager->set_stream_analog_level(stream_analog_level); + } + } + submodules_.agc_manager->Initialize(); + submodules_.agc_manager->SetupDigitalGainControl(*submodules_.gain_control); + submodules_.agc_manager->HandleCaptureOutputUsedChange( + capture_.capture_output_used); +} + +void AudioProcessingImpl::InitializeGainController2() { + if (!config_.gain_controller2.enabled) { + submodules_.gain_controller2.reset(); + return; + } + // Override the input volume controller configuration if the AGC2 experiment + // is running and its parameters require to fully switch the gain control to + // AGC2. + const bool input_volume_controller_config_overridden = + gain_controller2_experiment_params_.has_value() && + gain_controller2_experiment_params_->agc2_config.has_value(); + const InputVolumeController::Config input_volume_controller_config = + input_volume_controller_config_overridden + ? gain_controller2_experiment_params_->agc2_config + ->input_volume_controller + : InputVolumeController::Config{}; + // If the APM VAD sub-module is not used, let AGC2 use its internal VAD. + const bool use_internal_vad = + !UseApmVadSubModule(config_, gain_controller2_experiment_params_); + submodules_.gain_controller2 = std::make_unique( + config_.gain_controller2, input_volume_controller_config, + proc_fullband_sample_rate_hz(), num_proc_channels(), use_internal_vad); + submodules_.gain_controller2->SetCaptureOutputUsed( + capture_.capture_output_used); +} + +void AudioProcessingImpl::InitializeVoiceActivityDetector() { + if (!UseApmVadSubModule(config_, gain_controller2_experiment_params_)) { + submodules_.voice_activity_detector.reset(); + return; + } + + if (!submodules_.voice_activity_detector) { + RTC_DCHECK(!!submodules_.gain_controller2); + // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here. + submodules_.voice_activity_detector = + std::make_unique( + submodules_.gain_controller2->GetCpuFeatures(), + proc_fullband_sample_rate_hz()); + } else { + submodules_.voice_activity_detector->Initialize( + proc_fullband_sample_rate_hz()); + } +} + +void AudioProcessingImpl::InitializeNoiseSuppressor() { + submodules_.noise_suppressor.reset(); + + if (config_.noise_suppression.enabled) { + auto map_level = + [](AudioProcessing::Config::NoiseSuppression::Level level) { + using NoiseSuppresionConfig = + AudioProcessing::Config::NoiseSuppression; + switch (level) { + case NoiseSuppresionConfig::kLow: + return NsConfig::SuppressionLevel::k6dB; + case NoiseSuppresionConfig::kModerate: + return NsConfig::SuppressionLevel::k12dB; + case NoiseSuppresionConfig::kHigh: + return NsConfig::SuppressionLevel::k18dB; + case NoiseSuppresionConfig::kVeryHigh: + return NsConfig::SuppressionLevel::k21dB; + } + RTC_CHECK_NOTREACHED(); + }; + + NsConfig cfg; + cfg.target_level = map_level(config_.noise_suppression.level); + submodules_.noise_suppressor = std::make_unique( + cfg, proc_sample_rate_hz(), num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializeCaptureLevelsAdjuster() { + if (config_.pre_amplifier.enabled || + config_.capture_level_adjustment.enabled) { + // Use both the pre-amplifier and the capture level adjustment gains as + // pre-gains. + float pre_gain = 1.f; + if (config_.pre_amplifier.enabled) { + pre_gain *= config_.pre_amplifier.fixed_gain_factor; + } + if (config_.capture_level_adjustment.enabled) { + pre_gain *= config_.capture_level_adjustment.pre_gain_factor; + } + + submodules_.capture_levels_adjuster = + std::make_unique( + config_.capture_level_adjustment.analog_mic_gain_emulation.enabled, + config_.capture_level_adjustment.analog_mic_gain_emulation + .initial_level, + pre_gain, config_.capture_level_adjustment.post_gain_factor); + } else { + submodules_.capture_levels_adjuster.reset(); + } +} + +void AudioProcessingImpl::InitializeResidualEchoDetector() { + if (submodules_.echo_detector) { + submodules_.echo_detector->Initialize( + proc_fullband_sample_rate_hz(), 1, + formats_.render_processing_format.sample_rate_hz(), 1); + } +} + +void AudioProcessingImpl::InitializeAnalyzer() { + if (submodules_.capture_analyzer) { + submodules_.capture_analyzer->Initialize(proc_fullband_sample_rate_hz(), + num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializePostProcessor() { + if (submodules_.capture_post_processor) { + submodules_.capture_post_processor->Initialize( + proc_fullband_sample_rate_hz(), num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializePreProcessor() { + if (submodules_.render_pre_processor) { + submodules_.render_pre_processor->Initialize( + formats_.render_processing_format.sample_rate_hz(), + formats_.render_processing_format.num_channels()); + } +} + +void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) { + if (!aec_dump_) { + return; + } + + std::string experiments_description = ""; + // TODO(peah): Add semicolon-separated concatenations of experiment + // descriptions for other submodules. + if (!!submodules_.capture_post_processor) { + experiments_description += "CapturePostProcessor;"; + } + if (!!submodules_.render_pre_processor) { + experiments_description += "RenderPreProcessor;"; + } + if (capture_nonlocked_.echo_controller_enabled) { + experiments_description += "EchoController;"; + } + if (config_.gain_controller2.enabled) { + experiments_description += "GainController2;"; + } + + InternalAPMConfig apm_config; + + apm_config.aec_enabled = config_.echo_canceller.enabled; + apm_config.aec_delay_agnostic_enabled = false; + apm_config.aec_extended_filter_enabled = false; + apm_config.aec_suppression_level = 0; + + apm_config.aecm_enabled = !!submodules_.echo_control_mobile; + apm_config.aecm_comfort_noise_enabled = + submodules_.echo_control_mobile && + submodules_.echo_control_mobile->is_comfort_noise_enabled(); + apm_config.aecm_routing_mode = + submodules_.echo_control_mobile + ? static_cast(submodules_.echo_control_mobile->routing_mode()) + : 0; + + apm_config.agc_enabled = !!submodules_.gain_control; + + apm_config.agc_mode = submodules_.gain_control + ? static_cast(submodules_.gain_control->mode()) + : GainControl::kAdaptiveAnalog; + apm_config.agc_limiter_enabled = + submodules_.gain_control ? submodules_.gain_control->is_limiter_enabled() + : false; + apm_config.noise_robust_agc_enabled = !!submodules_.agc_manager; + + apm_config.hpf_enabled = config_.high_pass_filter.enabled; + + apm_config.ns_enabled = config_.noise_suppression.enabled; + apm_config.ns_level = static_cast(config_.noise_suppression.level); + + apm_config.transient_suppression_enabled = + config_.transient_suppression.enabled; + apm_config.experiments_description = experiments_description; + apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled; + apm_config.pre_amplifier_fixed_gain_factor = + config_.pre_amplifier.fixed_gain_factor; + + if (!forced && apm_config == apm_config_for_aec_dump_) { + return; + } + aec_dump_->WriteConfig(apm_config); + apm_config_for_aec_dump_ = apm_config; +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const float* const* src) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + const size_t channel_size = formats_.api_format.input_stream().num_frames(); + const size_t num_channels = formats_.api_format.input_stream().num_channels(); + aec_dump_->AddCaptureStreamInput( + AudioFrameView(src, num_channels, channel_size)); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const int16_t* const data, + const StreamConfig& config) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + aec_dump_->AddCaptureStreamInput(data, config.num_channels(), + config.num_frames()); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const float* const* processed_capture_stream) { + RTC_DCHECK(aec_dump_); + + const size_t channel_size = formats_.api_format.output_stream().num_frames(); + const size_t num_channels = + formats_.api_format.output_stream().num_channels(); + aec_dump_->AddCaptureStreamOutput(AudioFrameView( + processed_capture_stream, num_channels, channel_size)); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const int16_t* const data, + const StreamConfig& config) { + RTC_DCHECK(aec_dump_); + + aec_dump_->AddCaptureStreamOutput(data, config.num_channels(), + config.num_frames()); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordAudioProcessingState() { + RTC_DCHECK(aec_dump_); + AecDump::AudioProcessingState audio_proc_state; + audio_proc_state.delay = capture_nonlocked_.stream_delay_ms; + audio_proc_state.drift = 0; + audio_proc_state.applied_input_volume = capture_.applied_input_volume; + audio_proc_state.keypress = capture_.key_pressed; + aec_dump_->AddAudioProcessingState(audio_proc_state); +} + +AudioProcessingImpl::ApmCaptureState::ApmCaptureState() + : was_stream_delay_set(false), + capture_output_used(true), + capture_output_used_last_frame(true), + key_pressed(false), + capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + echo_path_gain_change(false), + prev_pre_adjustment_gain(-1.0f), + playout_volume(-1), + prev_playout_volume(-1), + applied_input_volume_changed(false) {} + +AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default; + +AudioProcessingImpl::ApmRenderState::ApmRenderState() = default; + +AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default; + +AudioProcessingImpl::ApmStatsReporter::ApmStatsReporter() + : stats_message_queue_(1) {} + +AudioProcessingImpl::ApmStatsReporter::~ApmStatsReporter() = default; + +AudioProcessingStats AudioProcessingImpl::ApmStatsReporter::GetStatistics() { + MutexLock lock_stats(&mutex_stats_); + bool new_stats_available = stats_message_queue_.Remove(&cached_stats_); + // If the message queue is full, return the cached stats. + static_cast(new_stats_available); + + return cached_stats_; +} + +void AudioProcessingImpl::ApmStatsReporter::UpdateStatistics( + const AudioProcessingStats& new_stats) { + AudioProcessingStats stats_to_queue = new_stats; + bool stats_message_passed = stats_message_queue_.Insert(&stats_to_queue); + // If the message queue is full, discard the new stats. + static_cast(stats_message_passed); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h new file mode 100644 index 0000000000..fe80e0d912 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl.h @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ + +#include + +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/aec3/echo_canceller3.h" +#include "modules/audio_processing/agc/agc_manager_direct.h" +#include "modules/audio_processing/agc/gain_control.h" +#include "modules/audio_processing/agc2/input_volume_stats_reporter.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/audio_processing/gain_controller2.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_frame_proxies.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "modules/audio_processing/ns/noise_suppressor.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "modules/audio_processing/rms_level.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/synchronization/mutex.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioConverter; + +constexpr int RuntimeSettingQueueSize() { + return 100; +} + +class AudioProcessingImpl : public AudioProcessing { + public: + // Methods forcing APM to run in a single-threaded manner. + // Acquires both the render and capture locks. + AudioProcessingImpl(); + AudioProcessingImpl(const AudioProcessing::Config& config, + std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + std::unique_ptr echo_control_factory, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer); + ~AudioProcessingImpl() override; + int Initialize() override; + int Initialize(const ProcessingConfig& processing_config) override; + void ApplyConfig(const AudioProcessing::Config& config) override; + bool CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) override; + bool CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) override; + // TODO(webrtc:5298) Deprecated variant. + void AttachAecDump(std::unique_ptr aec_dump) override; + void DetachAecDump() override; + void SetRuntimeSetting(RuntimeSetting setting) override; + bool PostRuntimeSetting(RuntimeSetting setting) override; + + // Capture-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the capture lock. + int ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) override; + int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + bool GetLinearAecOutput( + rtc::ArrayView> linear_output) const override; + void set_output_will_be_muted(bool muted) override; + void HandleCaptureOutputUsedSetting(bool capture_output_used) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + int set_stream_delay_ms(int delay) override; + void set_stream_key_pressed(bool key_pressed) override; + void set_stream_analog_level(int level) override; + int recommended_stream_analog_level() const + RTC_LOCKS_EXCLUDED(mutex_capture_) override; + + // Render-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the render lock. + int ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) override; + int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) override; + int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + + // Methods only accessed from APM submodules or + // from AudioProcessing tests in a single-threaded manner. + // Hence there is no need for locks in these. + int proc_sample_rate_hz() const override; + int proc_split_sample_rate_hz() const override; + size_t num_input_channels() const override; + size_t num_proc_channels() const override; + size_t num_output_channels() const override; + size_t num_reverse_channels() const override; + int stream_delay_ms() const override; + + AudioProcessingStats GetStatistics(bool has_remote_tracks) override { + return GetStatistics(); + } + AudioProcessingStats GetStatistics() override { + return stats_reporter_.GetStatistics(); + } + + AudioProcessing::Config GetConfig() const override; + + protected: + // Overridden in a mock. + virtual void InitializeLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void AssertLockedForTest() + RTC_ASSERT_EXCLUSIVE_LOCK(mutex_render_, mutex_capture_) { + mutex_render_.AssertHeld(); + mutex_capture_.AssertHeld(); + } + + private: + // TODO(peah): These friend classes should be removed as soon as the new + // parameter setting scheme allows. + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, DefaultBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, ValidConfigBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, InValidConfigBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + ToggleTransientSuppressor); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + ReinitializeTransientSuppressor); + FRIEND_TEST_ALL_PREFIXES(ApmWithSubmodulesExcludedTest, + BitexactWithDisabledModules); + FRIEND_TEST_ALL_PREFIXES( + AudioProcessingImplGainController2FieldTrialParametrizedTest, + ConfigAdjustedWhenExperimentEnabled); + + void set_stream_analog_level_locked(int level) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void UpdateRecommendedInputVolumeLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void OverrideSubmoduleCreationForTesting( + const ApmSubmoduleCreationOverrides& overrides); + + // Class providing thread-safe message pipe functionality for + // `runtime_settings_`. + class RuntimeSettingEnqueuer { + public: + explicit RuntimeSettingEnqueuer( + SwapQueue* runtime_settings); + ~RuntimeSettingEnqueuer(); + + // Enqueue setting and return whether the setting was successfully enqueued. + bool Enqueue(RuntimeSetting setting); + + private: + SwapQueue& runtime_settings_; + }; + + const std::unique_ptr data_dumper_; + static std::atomic instance_count_; + const bool use_setup_specific_default_aec3_config_; + + // Parameters for the "GainController2" experiment which determines whether + // the following APM sub-modules are created and, if so, their configurations: + // AGC2 (`gain_controller2`), AGC1 (`gain_control`, `agc_manager`) and TS + // (`transient_suppressor`). + // TODO(bugs.webrtc.org/7494): Remove when the "WebRTC-Audio-GainController2" + // field trial is removed. + struct GainController2ExperimentParams { + struct Agc2Config { + InputVolumeController::Config input_volume_controller; + AudioProcessing::Config::GainController2::AdaptiveDigital + adaptive_digital_controller; + }; + // When `agc2_config` is specified, all gain control switches to AGC2 and + // the configuration is overridden. + absl::optional agc2_config; + // When true, the transient suppressor submodule is never created regardless + // of the APM configuration. + bool disallow_transient_suppressor_usage; + }; + // Specified when the "WebRTC-Audio-GainController2" field trial is specified. + // TODO(bugs.webrtc.org/7494): Remove when the "WebRTC-Audio-GainController2" + // field trial is removed. + const absl::optional + gain_controller2_experiment_params_; + + // Parses the "WebRTC-Audio-GainController2" field trial. If disabled, returns + // an unspecified value. + static absl::optional + GetGainController2ExperimentParams(); + + // When `experiment_params` is specified, returns an APM configuration + // modified according to the experiment parameters. Otherwise returns + // `config`. + static AudioProcessing::Config AdjustConfig( + const AudioProcessing::Config& config, + const absl::optional& experiment_params); + // Returns true if the APM VAD sub-module should be used. + static bool UseApmVadSubModule( + const AudioProcessing::Config& config, + const absl::optional& experiment_params); + + TransientSuppressor::VadMode transient_suppressor_vad_mode_; + + SwapQueue capture_runtime_settings_; + SwapQueue render_runtime_settings_; + + RuntimeSettingEnqueuer capture_runtime_settings_enqueuer_; + RuntimeSettingEnqueuer render_runtime_settings_enqueuer_; + + // EchoControl factory. + const std::unique_ptr echo_control_factory_; + + class SubmoduleStates { + public: + SubmoduleStates(bool capture_post_processor_enabled, + bool render_pre_processor_enabled, + bool capture_analyzer_enabled); + // Updates the submodule state and returns true if it has changed. + bool Update(bool high_pass_filter_enabled, + bool mobile_echo_controller_enabled, + bool noise_suppressor_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool voice_activity_detector_enabled, + bool gain_adjustment_enabled, + bool echo_controller_enabled, + bool transient_suppressor_enabled); + bool CaptureMultiBandSubModulesActive() const; + bool CaptureMultiBandProcessingPresent() const; + bool CaptureMultiBandProcessingActive(bool ec_processing_active) const; + bool CaptureFullBandProcessingActive() const; + bool CaptureAnalyzerActive() const; + bool RenderMultiBandSubModulesActive() const; + bool RenderFullBandProcessingActive() const; + bool RenderMultiBandProcessingActive() const; + bool HighPassFilteringRequired() const; + + private: + const bool capture_post_processor_enabled_ = false; + const bool render_pre_processor_enabled_ = false; + const bool capture_analyzer_enabled_ = false; + bool high_pass_filter_enabled_ = false; + bool mobile_echo_controller_enabled_ = false; + bool noise_suppressor_enabled_ = false; + bool adaptive_gain_controller_enabled_ = false; + bool voice_activity_detector_enabled_ = false; + bool gain_controller2_enabled_ = false; + bool gain_adjustment_enabled_ = false; + bool echo_controller_enabled_ = false; + bool transient_suppressor_enabled_ = false; + bool first_update_ = true; + }; + + // Methods for modifying the formats struct that is used by both + // the render and capture threads. The check for whether modifications are + // needed is done while holding a single lock only, thereby avoiding that the + // capture thread blocks the render thread. + // Called by render: Holds the render lock when reading the format struct and + // acquires both locks if reinitialization is required. + void MaybeInitializeRender(const StreamConfig& input_config, + const StreamConfig& output_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + // Called by capture: Acquires and releases the capture lock to read the + // format struct and acquires both locks if reinitialization is needed. + void MaybeInitializeCapture(const StreamConfig& input_config, + const StreamConfig& output_config); + + // Method for updating the state keeping track of the active submodules. + // Returns a bool indicating whether the state has changed. + bool UpdateActiveSubmoduleStates() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Methods requiring APM running in a single-threaded manner, requiring both + // the render and capture lock to be acquired. + void InitializeLocked(const ProcessingConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void InitializeResidualEchoDetector() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void InitializeEchoController() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + + // Initializations of capture-only sub-modules, requiring the capture lock + // already acquired. + void InitializeHighPassFilter(bool forced_reset) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeGainController1() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeTransientSuppressor() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + // Initializes the `GainController2` sub-module. If the sub-module is enabled, + // recreates it. + void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + // Initializes the `VoiceActivityDetectorWrapper` sub-module. If the + // sub-module is enabled, recreates it. Call `InitializeGainController2()` + // first. + // TODO(bugs.webrtc.org/13663): Remove if TS is removed otherwise remove call + // order requirement - i.e., decouple from `InitializeGainController2()`. + void InitializeVoiceActivityDetector() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeNoiseSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeCaptureLevelsAdjuster() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Initializations of render-only submodules, requiring the render lock + // already acquired. + void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Sample rate used for the fullband processing. + int proc_fullband_sample_rate_hz() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Empties and handles the respective RuntimeSetting queues. + void HandleCaptureRuntimeSettings() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void HandleRenderRuntimeSettings() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + void EmptyQueuedRenderAudio() RTC_LOCKS_EXCLUDED(mutex_capture_); + void EmptyQueuedRenderAudioLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + void AllocateRenderQueue() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); + void QueueBandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + void QueueNonbandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Capture-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int ProcessCaptureStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Render-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int AnalyzeReverseStreamLocked(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + int ProcessRenderStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); + + // Collects configuration settings from public and private + // submodules to be saved as an audioproc::Config message on the + // AecDump if it is attached. If not `forced`, only writes the current + // config if it is different from the last saved one; if `forced`, + // writes the config regardless of the last saved. + void WriteAecDumpConfigMessage(bool forced) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump of current configuration and capture data. + void RecordUnprocessedCaptureStream(const float* const* capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void RecordUnprocessedCaptureStream(const int16_t* const data, + const StreamConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump of current configuration and + // processed capture data and issues a capture stream recording + // request. + void RecordProcessedCaptureStream( + const float* const* processed_capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + void RecordProcessedCaptureStream(const int16_t* const data, + const StreamConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Notifies attached AecDump about current state (delay, drift, etc). + void RecordAudioProcessingState() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // Ensures that overruns in the capture runtime settings queue is properly + // handled by the code, providing safe-fallbacks to mitigate the implications + // of any settings being missed. + void HandleOverrunInCaptureRuntimeSettingsQueue() + RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); + + // AecDump instance used for optionally logging APM config, input + // and output to file in the AEC-dump format defined in debug.proto. + std::unique_ptr aec_dump_; + + // Hold the last config written with AecDump for avoiding writing + // the same config twice. + InternalAPMConfig apm_config_for_aec_dump_ RTC_GUARDED_BY(mutex_capture_); + + // Critical sections. + mutable Mutex mutex_render_ RTC_ACQUIRED_BEFORE(mutex_capture_); + mutable Mutex mutex_capture_; + + // Struct containing the Config specifying the behavior of APM. + AudioProcessing::Config config_; + + // Overrides for testing the exclusion of some submodules from the build. + ApmSubmoduleCreationOverrides submodule_creation_overrides_ + RTC_GUARDED_BY(mutex_capture_); + + // Class containing information about what submodules are active. + SubmoduleStates submodule_states_; + + // Struct containing the pointers to the submodules. + struct Submodules { + Submodules(std::unique_ptr capture_post_processor, + std::unique_ptr render_pre_processor, + rtc::scoped_refptr echo_detector, + std::unique_ptr capture_analyzer) + : echo_detector(std::move(echo_detector)), + capture_post_processor(std::move(capture_post_processor)), + render_pre_processor(std::move(render_pre_processor)), + capture_analyzer(std::move(capture_analyzer)) {} + // Accessed internally from capture or during initialization. + const rtc::scoped_refptr echo_detector; + const std::unique_ptr capture_post_processor; + const std::unique_ptr render_pre_processor; + const std::unique_ptr capture_analyzer; + std::unique_ptr agc_manager; + std::unique_ptr gain_control; + std::unique_ptr gain_controller2; + std::unique_ptr voice_activity_detector; + std::unique_ptr high_pass_filter; + std::unique_ptr echo_controller; + std::unique_ptr echo_control_mobile; + std::unique_ptr noise_suppressor; + std::unique_ptr transient_suppressor; + std::unique_ptr capture_levels_adjuster; + } submodules_; + + // State that is written to while holding both the render and capture locks + // but can be read without any lock being held. + // As this is only accessed internally of APM, and all internal methods in APM + // either are holding the render or capture locks, this construct is safe as + // it is not possible to read the variables while writing them. + struct ApmFormatState { + ApmFormatState() + : // Format of processing streams at input/output call sites. + api_format({{{kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}, + {kSampleRate16kHz, 1}}}), + render_processing_format(kSampleRate16kHz, 1) {} + ProcessingConfig api_format; + StreamConfig render_processing_format; + } formats_; + + // APM constants. + const struct ApmConstants { + ApmConstants(bool multi_channel_render_support, + bool multi_channel_capture_support, + bool enforce_split_band_hpf, + bool minimize_processing_for_unused_output, + bool transient_suppressor_forced_off) + : multi_channel_render_support(multi_channel_render_support), + multi_channel_capture_support(multi_channel_capture_support), + enforce_split_band_hpf(enforce_split_band_hpf), + minimize_processing_for_unused_output( + minimize_processing_for_unused_output), + transient_suppressor_forced_off(transient_suppressor_forced_off) {} + bool multi_channel_render_support; + bool multi_channel_capture_support; + bool enforce_split_band_hpf; + bool minimize_processing_for_unused_output; + bool transient_suppressor_forced_off; + } constants_; + + struct ApmCaptureState { + ApmCaptureState(); + ~ApmCaptureState(); + bool was_stream_delay_set; + bool capture_output_used; + bool capture_output_used_last_frame; + bool key_pressed; + std::unique_ptr capture_audio; + std::unique_ptr capture_fullband_audio; + std::unique_ptr linear_aec_output; + // Only the rate and samples fields of capture_processing_format_ are used + // because the capture processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + bool echo_path_gain_change; + float prev_pre_adjustment_gain; + int playout_volume; + int prev_playout_volume; + AudioProcessingStats stats; + // Input volume applied on the audio input device when the audio is + // acquired. Unspecified when unknown. + absl::optional applied_input_volume; + bool applied_input_volume_changed; + // Recommended input volume to apply on the audio input device the next time + // that audio is acquired. Unspecified when no input volume can be + // recommended. + absl::optional recommended_input_volume; + } capture_ RTC_GUARDED_BY(mutex_capture_); + + struct ApmCaptureNonLockedState { + ApmCaptureNonLockedState() + : capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + stream_delay_ms(0) {} + // Only the rate and samples fields of capture_processing_format_ are used + // because the forward processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + int stream_delay_ms; + bool echo_controller_enabled = false; + } capture_nonlocked_; + + struct ApmRenderState { + ApmRenderState(); + ~ApmRenderState(); + std::unique_ptr render_converter; + std::unique_ptr render_audio; + } render_ RTC_GUARDED_BY(mutex_render_); + + // Class for statistics reporting. The class is thread-safe and no lock is + // needed when accessing it. + class ApmStatsReporter { + public: + ApmStatsReporter(); + ~ApmStatsReporter(); + + // Returns the most recently reported statistics. + AudioProcessingStats GetStatistics(); + + // Update the cached statistics. + void UpdateStatistics(const AudioProcessingStats& new_stats); + + private: + Mutex mutex_stats_; + AudioProcessingStats cached_stats_ RTC_GUARDED_BY(mutex_stats_); + SwapQueue stats_message_queue_; + } stats_reporter_; + + std::vector aecm_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector aecm_capture_queue_buffer_ + RTC_GUARDED_BY(mutex_capture_); + + size_t agc_render_queue_element_max_size_ RTC_GUARDED_BY(mutex_render_) + RTC_GUARDED_BY(mutex_capture_) = 0; + std::vector agc_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector agc_capture_queue_buffer_ RTC_GUARDED_BY(mutex_capture_); + + size_t red_render_queue_element_max_size_ RTC_GUARDED_BY(mutex_render_) + RTC_GUARDED_BY(mutex_capture_) = 0; + std::vector red_render_queue_buffer_ RTC_GUARDED_BY(mutex_render_); + std::vector red_capture_queue_buffer_ RTC_GUARDED_BY(mutex_capture_); + + RmsLevel capture_input_rms_ RTC_GUARDED_BY(mutex_capture_); + RmsLevel capture_output_rms_ RTC_GUARDED_BY(mutex_capture_); + int capture_rms_interval_counter_ RTC_GUARDED_BY(mutex_capture_) = 0; + + InputVolumeStatsReporter applied_input_volume_stats_reporter_ + RTC_GUARDED_BY(mutex_capture_); + InputVolumeStatsReporter recommended_input_volume_stats_reporter_ + RTC_GUARDED_BY(mutex_capture_); + + // Lock protection not needed. + std::unique_ptr< + SwapQueue, RenderQueueItemVerifier>> + aecm_render_signal_queue_; + std::unique_ptr< + SwapQueue, RenderQueueItemVerifier>> + agc_render_signal_queue_; + std::unique_ptr, RenderQueueItemVerifier>> + red_render_signal_queue_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc new file mode 100644 index 0000000000..3614b574df --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -0,0 +1,1012 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/event.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "rtc_base/synchronization/mutex.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kMaxFrameSize = 480; +constexpr TimeDelta kTestTimeOutLimit = TimeDelta::Minutes(10); + +class AudioProcessingImplLockTest; + +// Type of the render thread APM API call to use in the test. +enum class RenderApiImpl { + ProcessReverseStreamImplInteger, + ProcessReverseStreamImplFloat, + AnalyzeReverseStreamImplFloat, +}; + +// Type of the capture thread APM API call to use in the test. +enum class CaptureApiImpl { ProcessStreamImplInteger, ProcessStreamImplFloat }; + +// The runtime parameter setting scheme to use in the test. +enum class RuntimeParameterSettingScheme { + SparseStreamMetadataChangeScheme, + ExtremeStreamMetadataChangeScheme, + FixedMonoStreamMetadataScheme, + FixedStereoStreamMetadataScheme +}; + +// Variant of echo canceller settings to use in the test. +enum class AecType { + BasicWebRtcAecSettings, + AecTurnedOff, + BasicWebRtcAecSettingsWithExtentedFilter, + BasicWebRtcAecSettingsWithDelayAgnosticAec, + BasicWebRtcAecSettingsWithAecMobile +}; + +// Thread-safe random number generator wrapper. +class RandomGenerator { + public: + RandomGenerator() : rand_gen_(42U) {} + + int RandInt(int min, int max) { + MutexLock lock(&mutex_); + return rand_gen_.Rand(min, max); + } + + int RandInt(int max) { + MutexLock lock(&mutex_); + return rand_gen_.Rand(max); + } + + float RandFloat() { + MutexLock lock(&mutex_); + return rand_gen_.Rand(); + } + + private: + Mutex mutex_; + Random rand_gen_ RTC_GUARDED_BY(mutex_); +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(int max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + + frame.resize(2 * max_frame_size); + } + + std::vector frame; + + std::vector output_frame; + std::vector output_frame_channels; + std::vector input_frame; + std::vector input_framechannels; + + int input_sample_rate_hz = 16000; + int input_number_of_channels = 1; + int output_sample_rate_hz = 16000; + int output_number_of_channels = 1; +}; + +// The configuration for the test. +struct TestConfig { + // Test case generator for the test configurations to use in the brief tests. + static std::vector GenerateBriefTestConfigs() { + std::vector test_configs; + AecType aec_types[] = {AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto aec_type : aec_types) { + TestConfig test_config; + test_config.aec_type = aec_type; + + test_config.min_number_of_calls = 300; + + // Perform tests only with the extreme runtime parameter setting scheme. + test_config.runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + + // Only test 16 kHz for this test suite. + test_config.initial_sample_rate_hz = 16000; + + // Create test config for the Int16 processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImplInteger; + test_config.capture_api_function = + CaptureApiImpl::ProcessStreamImplInteger; + test_configs.push_back(test_config); + + // Create test config for the StreamConfig processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImplFloat; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImplFloat; + test_configs.push_back(test_config); + } + + // Return the created test configurations. + return test_configs; + } + + // Test case generator for the test configurations to use in the extensive + // tests. + static std::vector GenerateExtensiveTestConfigs() { + // Lambda functions for the test config generation. + auto add_processing_apis = [](TestConfig test_config) { + struct AllowedApiCallCombinations { + RenderApiImpl render_api; + CaptureApiImpl capture_api; + }; + + const AllowedApiCallCombinations api_calls[] = { + {RenderApiImpl::ProcessReverseStreamImplInteger, + CaptureApiImpl::ProcessStreamImplInteger}, + {RenderApiImpl::ProcessReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::AnalyzeReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::ProcessReverseStreamImplInteger, + CaptureApiImpl::ProcessStreamImplFloat}, + {RenderApiImpl::ProcessReverseStreamImplFloat, + CaptureApiImpl::ProcessStreamImplInteger}}; + std::vector out; + for (auto api_call : api_calls) { + test_config.render_api_function = api_call.render_api; + test_config.capture_api_function = api_call.capture_api; + out.push_back(test_config); + } + return out; + }; + + auto add_aec_settings = [](const std::vector& in) { + std::vector out; + AecType aec_types[] = { + AecType::BasicWebRtcAecSettings, AecType::AecTurnedOff, + AecType::BasicWebRtcAecSettingsWithExtentedFilter, + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto test_config : in) { + // Due to a VisualStudio 2015 compiler issue, the internal loop + // variable here cannot override a previously defined name. + // In other words "type" cannot be named "aec_type" here. + // https://connect.microsoft.com/VisualStudio/feedback/details/2291755 + for (auto type : aec_types) { + test_config.aec_type = type; + out.push_back(test_config); + } + } + return out; + }; + + auto add_settings_scheme = [](const std::vector& in) { + std::vector out; + RuntimeParameterSettingScheme schemes[] = { + RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme, + RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme}; + + for (auto test_config : in) { + for (auto scheme : schemes) { + test_config.runtime_parameter_setting_scheme = scheme; + out.push_back(test_config); + } + } + return out; + }; + + auto add_sample_rates = [](const std::vector& in) { + const int sample_rates[] = {8000, 16000, 32000, 48000}; + + std::vector out; + for (auto test_config : in) { + auto available_rates = + (test_config.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile + ? rtc::ArrayView(sample_rates, 2) + : rtc::ArrayView(sample_rates)); + + for (auto rate : available_rates) { + test_config.initial_sample_rate_hz = rate; + out.push_back(test_config); + } + } + return out; + }; + + // Generate test configurations of the relevant combinations of the + // parameters to + // test. + TestConfig test_config; + test_config.min_number_of_calls = 10000; + return add_sample_rates(add_settings_scheme( + add_aec_settings(add_processing_apis(test_config)))); + } + + RenderApiImpl render_api_function = + RenderApiImpl::ProcessReverseStreamImplFloat; + CaptureApiImpl capture_api_function = CaptureApiImpl::ProcessStreamImplFloat; + RuntimeParameterSettingScheme runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + int initial_sample_rate_hz = 16000; + AecType aec_type = AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec; + int min_number_of_calls = 300; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + MutexLock lock(&mutex_); + render_count++; + } + + void IncreaseCaptureCounter() { + MutexLock lock(&mutex_); + capture_count++; + } + + int GetCaptureCounter() const { + MutexLock lock(&mutex_); + return capture_count; + } + + int GetRenderCounter() const { + MutexLock lock(&mutex_); + return render_count; + } + + int CaptureMinusRenderCounters() const { + MutexLock lock(&mutex_); + return capture_count - render_count; + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) { + MutexLock lock(&mutex_); + return (render_count > threshold && capture_count > threshold); + } + + private: + mutable Mutex mutex_; + int render_count RTC_GUARDED_BY(mutex_) = 0; + int capture_count RTC_GUARDED_BY(mutex_) = 0; +}; + +// Class for handling the capture side processing. +class CaptureProcessor { + public: + CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + static constexpr int kMaxCallDifference = 10; + static constexpr float kCaptureInputFloatLevel = 0.03125f; + static constexpr int kCaptureInputFixLevel = 1024; + + void PrepareFrame(); + void CallApmCaptureSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; +}; + +// Class for handling the stats processing. +class StatsProcessor { + public: + StatsProcessor(RandomGenerator* rand_gen, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + RandomGenerator* rand_gen_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* apm_ = nullptr; +}; + +// Class for handling the render side processing. +class RenderProcessor { + public: + RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm); + void Process(); + + private: + static constexpr int kMaxCallDifference = 10; + static constexpr int kRenderInputFixLevel = 16384; + static constexpr float kRenderInputFloatLevel = 0.5f; + + void PrepareFrame(); + void CallApmRenderSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; + bool first_render_call_ = true; +}; + +class AudioProcessingImplLockTest + : public ::testing::TestWithParam { + public: + AudioProcessingImplLockTest(); + bool RunTest(); + bool MaybeEndTest(); + + private: + void SetUp() override; + void TearDown() override; + + // Tests whether all the required render and capture side calls have been + // done. + bool TestDone() { + return frame_counters_.BothCountersExceedeThreshold( + test_config_.min_number_of_calls); + } + + // Start the threads used in the test. + void StartThreads() { + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + render_thread_state_.Process(); + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) { + capture_thread_state_.Process(); + } + }, + "capture", attributes); + + stats_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (!MaybeEndTest()) + stats_thread_state_.Process(); + }, + "stats", attributes); + } + + // Event handlers for the test. + rtc::Event test_complete_; + rtc::Event render_call_event_; + rtc::Event capture_call_event_; + + // Thread related variables. + mutable RandomGenerator rand_gen_; + + const TestConfig test_config_; + rtc::scoped_refptr apm_; + FrameCounters frame_counters_; + RenderProcessor render_thread_state_; + CaptureProcessor capture_thread_state_; + StatsProcessor stats_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; + rtc::PlatformThread stats_thread_; +}; + +// Sleeps a random time between 0 and max_sleep milliseconds. +void SleepRandomMs(int max_sleep, RandomGenerator* rand_gen) { + int sleeptime = rand_gen->RandInt(0, max_sleep); + SleepMs(sleeptime); +} + +// Populates a float audio frame with random data. +void PopulateAudioFrame(float** frame, + float amplitude, + size_t num_channels, + size_t samples_per_channel, + RandomGenerator* rand_gen) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit quantized float number between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->RandFloat() - 1); + } + } +} + +// Populates an integer audio frame with random data. +void PopulateAudioFrame(float amplitude, + size_t num_channels, + size_t samples_per_channel, + rtc::ArrayView frame, + RandomGenerator* rand_gen) { + ASSERT_GT(amplitude, 0); + ASSERT_LE(amplitude, 32767); + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit number between -(amplitude+1) and + // amplitude. + frame[k * ch] = rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1; + } + } +} + +AudioProcessing::Config GetApmTestConfig(AecType aec_type) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = aec_type != AecType::AecTurnedOff; + apm_config.echo_canceller.mobile_mode = + aec_type == AecType::BasicWebRtcAecSettingsWithAecMobile; + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm_config.noise_suppression.enabled = true; + return apm_config; +} + +AudioProcessingImplLockTest::AudioProcessingImplLockTest() + : test_config_(GetParam()), + apm_(AudioProcessingBuilderForTesting() + .SetConfig(GetApmTestConfig(test_config_.aec_type)) + .Create()), + render_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + &test_config_, + apm_.get()), + capture_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + &test_config_, + apm_.get()), + stats_thread_state_(&rand_gen_, &test_config_, apm_.get()) {} + +// Run the test with a timeout. +bool AudioProcessingImplLockTest::RunTest() { + StartThreads(); + return test_complete_.Wait(kTestTimeOutLimit); +} + +bool AudioProcessingImplLockTest::MaybeEndTest() { + if (HasFatalFailure() || TestDone()) { + test_complete_.Set(); + return true; + } + return false; +} + +void AudioProcessingImplLockTest::SetUp() {} + +void AudioProcessingImplLockTest::TearDown() { + render_call_event_.Set(); + capture_call_event_.Set(); +} + +StatsProcessor::StatsProcessor(RandomGenerator* rand_gen, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), test_config_(test_config), apm_(apm) {} + +// Implements the callback functionality for the statistics +// collection thread. +void StatsProcessor::Process() { + SleepRandomMs(100, rand_gen_); + + AudioProcessing::Config apm_config = apm_->GetConfig(); + if (test_config_->aec_type != AecType::AecTurnedOff) { + EXPECT_TRUE(apm_config.echo_canceller.enabled); + EXPECT_EQ(apm_config.echo_canceller.mobile_mode, + (test_config_->aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile)); + } else { + EXPECT_FALSE(apm_config.echo_canceller.enabled); + } + EXPECT_TRUE(apm_config.gain_controller1.enabled); + EXPECT_TRUE(apm_config.noise_suppression.enabled); + + // The below return value is not testable. + apm_->GetStatistics(); +} + +CaptureProcessor::CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the capture thread. +void CaptureProcessor::Process() { + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->CaptureMinusRenderCounters() > kMaxCallDifference) { + render_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified capture side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the capture side processing call. + CallApmCaptureSide(); + + // Increase the number of capture-side calls. + frame_counters_->IncreaseCaptureCounter(); + + // Flag to the render thread that another capture API call has occurred + // by triggering this threads call event. + capture_call_event_->Set(); +} + +// Prepares a frame with relevant audio data and metadata. +void CaptureProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the integer + // interface is used. + if (test_config_->capture_api_function == + CaptureApiImpl::ProcessStreamImplInteger) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audio data. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + + PopulateAudioFrame(kCaptureInputFixLevel, input_stream_config.num_channels(), + input_stream_config.num_frames(), frame_data_.frame, + rand_gen_); + + PopulateAudioFrame(&frame_data_.input_frame[0], kCaptureInputFloatLevel, + input_stream_config.num_channels(), + input_stream_config.num_frames(), rand_gen_); +} + +// Applies the capture side processing API call. +void CaptureProcessor::CallApmCaptureSide() { + // Prepare a proper capture side processing API call input. + PrepareFrame(); + + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Set the analog level. + apm_->set_stream_analog_level(80); + + // Call the specified capture side API processing method. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + StreamConfig output_stream_config(frame_data_.output_sample_rate_hz, + frame_data_.output_number_of_channels); + int result = AudioProcessing::kNoError; + switch (test_config_->capture_api_function) { + case CaptureApiImpl::ProcessStreamImplInteger: + result = + apm_->ProcessStream(frame_data_.frame.data(), input_stream_config, + output_stream_config, frame_data_.frame.data()); + break; + case CaptureApiImpl::ProcessStreamImplFloat: + result = apm_->ProcessStream(&frame_data_.input_frame[0], + input_stream_config, output_stream_config, + &frame_data_.output_frame[0]); + break; + default: + FAIL(); + } + + // Retrieve the new analog level. + apm_->recommended_stream_analog_level(); + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any runtime capture APM API calls and audio stream characteristics +// specified by the scheme for the test. +void CaptureProcessor::ApplyRuntimeSettingScheme() { + const int capture_count_local = frame_counters_->GetCaptureCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (capture_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 11 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (capture_count_local % 73 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (capture_count_local % 89 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 97 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (capture_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (capture_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 5 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (capture_count_local % 47 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (capture_count_local % 53 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 71 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (capture_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (capture_count_local % 2 == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Call any specified runtime APM setter and + // getter calls. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local % 2 == 0) { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(30)); + apm_->set_stream_key_pressed(true); + } else { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(50)); + apm_->set_stream_key_pressed(false); + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +RenderProcessor::RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + const TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the render thread. +void RenderProcessor::Process() { + // Conditional wait to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + if (first_render_call_) { + capture_call_event_->Wait(rtc::Event::kForever); + first_render_call_ = false; + } + + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + capture_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified render side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the render side processing call. + CallApmRenderSide(); + + // Increase the number of render-side calls. + frame_counters_->IncreaseRenderCounter(); + + // Flag to the capture thread that another render API call has occurred + // by triggering this threads call event. + render_call_event_->Set(); +} + +// Prepares the render side frame and the accompanying metadata +// with the appropriate information. +void RenderProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the integer interface is + // used. + if ((test_config_->render_api_function == + RenderApiImpl::ProcessReverseStreamImplInteger) || + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile)) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audio data. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + + PopulateAudioFrame(kRenderInputFixLevel, input_stream_config.num_channels(), + input_stream_config.num_frames(), frame_data_.frame, + rand_gen_); + + PopulateAudioFrame(&frame_data_.input_frame[0], kRenderInputFloatLevel, + input_stream_config.num_channels(), + input_stream_config.num_frames(), rand_gen_); +} + +// Makes the render side processing API call. +void RenderProcessor::CallApmRenderSide() { + // Prepare a proper render side processing API call input. + PrepareFrame(); + + // Call the specified render side API processing method. + StreamConfig input_stream_config(frame_data_.input_sample_rate_hz, + frame_data_.input_number_of_channels); + StreamConfig output_stream_config(frame_data_.output_sample_rate_hz, + frame_data_.output_number_of_channels); + int result = AudioProcessing::kNoError; + switch (test_config_->render_api_function) { + case RenderApiImpl::ProcessReverseStreamImplInteger: + result = apm_->ProcessReverseStream( + frame_data_.frame.data(), input_stream_config, output_stream_config, + frame_data_.frame.data()); + break; + case RenderApiImpl::ProcessReverseStreamImplFloat: + result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], input_stream_config, + output_stream_config, &frame_data_.output_frame[0]); + break; + case RenderApiImpl::AnalyzeReverseStreamImplFloat: + result = apm_->AnalyzeReverseStream(&frame_data_.input_frame[0], + input_stream_config); + break; + default: + FAIL(); + } + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any render capture side APM API calls and audio stream +// characteristics +// specified by the scheme for the test. +void RenderProcessor::ApplyRuntimeSettingScheme() { + const int render_count_local = frame_counters_->GetRenderCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (render_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 47 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (render_count_local % 71 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (render_count_local % 79 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 83 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (render_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (render_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 17 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (render_count_local % 19 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (render_count_local % 29 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 61 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (render_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (render_count_local == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +} // namespace + +TEST_P(AudioProcessingImplLockTest, LockTest) { + // Run test and verify that it did not time out. + ASSERT_TRUE(RunTest()); +} + +// Instantiate tests from the extreme test configuration set. +INSTANTIATE_TEST_SUITE_P( + DISABLED_AudioProcessingImplLockExtensive, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateExtensiveTestConfigs())); + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplLockBrief, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateBriefTestConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc new file mode 100644 index 0000000000..7c12a07ed9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_impl_unittest.cc @@ -0,0 +1,1569 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/make_ref_counted.h" +#include "api/scoped_refptr.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/optionally_built_submodule_creators.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "modules/audio_processing/test/echo_control_mock.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" +#include "test/field_trial.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Invoke; +using ::testing::NotNull; + +class MockInitialize : public AudioProcessingImpl { + public: + MockInitialize() : AudioProcessingImpl() {} + + MOCK_METHOD(void, InitializeLocked, (), (override)); + void RealInitializeLocked() { + AssertLockedForTest(); + AudioProcessingImpl::InitializeLocked(); + } + + MOCK_METHOD(void, AddRef, (), (const, override)); + MOCK_METHOD(rtc::RefCountReleaseStatus, Release, (), (const, override)); +}; + +// Creates MockEchoControl instances and provides a raw pointer access to +// the next created one. The raw pointer is meant to be used with gmock. +// Returning a pointer of the next created MockEchoControl instance is necessary +// for the following reasons: (i) gmock expectations must be set before any call +// occurs, (ii) APM is initialized the first time that +// AudioProcessingImpl::ProcessStream() is called and the initialization leads +// to the creation of a new EchoControl object. +class MockEchoControlFactory : public EchoControlFactory { + public: + MockEchoControlFactory() : next_mock_(std::make_unique()) {} + // Returns a pointer to the next MockEchoControl that this factory creates. + MockEchoControl* GetNext() const { return next_mock_.get(); } + std::unique_ptr Create(int sample_rate_hz, + int num_render_channels, + int num_capture_channels) override { + std::unique_ptr mock = std::move(next_mock_); + next_mock_ = std::make_unique(); + return mock; + } + + private: + std::unique_ptr next_mock_; +}; + +// Mocks EchoDetector and records the first samples of the last analyzed render +// stream frame. Used to check what data is read by an EchoDetector +// implementation injected into an APM. +class TestEchoDetector : public EchoDetector { + public: + TestEchoDetector() + : analyze_render_audio_called_(false), + last_render_audio_first_sample_(0.f) {} + ~TestEchoDetector() override = default; + void AnalyzeRenderAudio(rtc::ArrayView render_audio) override { + last_render_audio_first_sample_ = render_audio[0]; + analyze_render_audio_called_ = true; + } + void AnalyzeCaptureAudio(rtc::ArrayView capture_audio) override { + } + void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) override {} + EchoDetector::Metrics GetMetrics() const override { return {}; } + // Returns true if AnalyzeRenderAudio() has been called at least once. + bool analyze_render_audio_called() const { + return analyze_render_audio_called_; + } + // Returns the first sample of the last analyzed render frame. + float last_render_audio_first_sample() const { + return last_render_audio_first_sample_; + } + + private: + bool analyze_render_audio_called_; + float last_render_audio_first_sample_; +}; + +// Mocks CustomProcessing and applies ProcessSample() to all the samples. +// Meant to be injected into an APM to modify samples in a known and detectable +// way. +class TestRenderPreProcessor : public CustomProcessing { + public: + TestRenderPreProcessor() = default; + ~TestRenderPreProcessor() = default; + void Initialize(int sample_rate_hz, int num_channels) override {} + void Process(AudioBuffer* audio) override { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_view(audio->channels()[k], + audio->num_frames()); + std::transform(channel_view.begin(), channel_view.end(), + channel_view.begin(), ProcessSample); + } + } + std::string ToString() const override { return "TestRenderPreProcessor"; } + void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting) override {} + // Modifies a sample. This member is used in Process() to modify a frame and + // it is publicly visible to enable tests. + static constexpr float ProcessSample(float x) { return 2.f * x; } +}; + +// Runs `apm` input processing for volume adjustments for `num_frames` random +// frames starting from the volume `initial_volume`. This includes three steps: +// 1) Set the input volume 2) Process the stream 3) Set the new recommended +// input volume. Returns the new recommended input volume. +int ProcessInputVolume(AudioProcessing& apm, + int num_frames, + int initial_volume) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + int recommended_input_volume = initial_volume; + for (int i = 0; i < num_frames; ++i) { + Random random_generator(2341U); + RandomizeSampleVector(&random_generator, buffer); + + apm.set_stream_analog_level(recommended_input_volume); + apm.ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers); + recommended_input_volume = apm.recommended_stream_analog_level(); + } + return recommended_input_volume; +} + +} // namespace + +TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { + MockInitialize mock; + ON_CALL(mock, InitializeLocked) + .WillByDefault(Invoke(&mock, &MockInitialize::RealInitializeLocked)); + + EXPECT_CALL(mock, InitializeLocked).Times(1); + mock.Initialize(); + + constexpr size_t kMaxSampleRateHz = 32000; + constexpr size_t kMaxNumChannels = 2; + std::array frame; + frame.fill(0); + StreamConfig config(16000, 1); + // Call with the default parameters; there should be an init. + EXPECT_CALL(mock, InitializeLocked).Times(0); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); + + // New sample rate. (Only impacts ProcessStream). + config = StreamConfig(32000, 1); + EXPECT_CALL(mock, InitializeLocked).Times(1); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + + // New number of channels. + config = StreamConfig(32000, 2); + EXPECT_CALL(mock, InitializeLocked).Times(2); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); + + // A new sample rate passed to ProcessReverseStream should cause an init. + config = StreamConfig(16000, 2); + EXPECT_CALL(mock, InitializeLocked).Times(1); + EXPECT_NOERR( + mock.ProcessReverseStream(frame.data(), config, config, frame.data())); +} + +TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + apm_config.pre_amplifier.fixed_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, + LevelAdjustmentUpdateCapturePreGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.pre_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, + LevelAdjustmentUpdateCapturePostGainRuntimeSetting) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.post_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 48000; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kNumChannels = 2; + + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + EXPECT_EQ(frame[100], kAudioLevel) + << "With factor 1, frame shouldn't be modified."; + + constexpr float kGainFactor = 2.f; + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePostGain(kGainFactor)); + + // Process for two frames to have time to ramp up gain. + for (int i = 0; i < 2; ++i) { + frame.fill(kAudioLevel); + apm->ProcessStream(frame.data(), config, config, frame.data()); + } + EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) + << "Frame should be amplified."; +} + +TEST(AudioProcessingImplTest, EchoControllerObservesSetCaptureUsageChange) { + // Tests that the echo controller observes that the capture usage has been + // updated. + auto echo_control_factory = std::make_unique(); + const MockEchoControlFactory* echo_control_factory_ptr = + echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + + constexpr int16_t kAudioLevel = 10000; + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + // Ensure that SetCaptureOutputUsage is not called when no runtime settings + // are passed. + EXPECT_CALL(*echo_control_mock, SetCaptureOutputUsage(testing::_)).Times(0); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // Ensure that SetCaptureOutputUsage is called with the right information when + // a runtime setting is passed. + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(1); + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/true)) + .Times(1); + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/true))); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // The number of positions to place items in the queue is equal to the queue + // size minus 1. + constexpr int kNumSlotsInQueue = RuntimeSettingQueueSize(); + + // Ensure that SetCaptureOutputUsage is called with the right information when + // many runtime settings are passed. + for (int k = 0; k < kNumSlotsInQueue - 1; ++k) { + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + } + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(kNumSlotsInQueue - 1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + // Ensure that SetCaptureOutputUsage is properly called with the fallback + // value when the runtime settings queue becomes full. + for (int k = 0; k < kNumSlotsInQueue; ++k) { + EXPECT_TRUE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + } + EXPECT_FALSE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + EXPECT_FALSE(apm->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + /*capture_output_used=*/false))); + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/false)) + .Times(kNumSlotsInQueue); + EXPECT_CALL(*echo_control_mock, + SetCaptureOutputUsage(/*capture_output_used=*/true)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesPreAmplifierEchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // pre-amplifier submodule changes the gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.pre_amplifier.enabled = true; + apm_config.pre_amplifier.fixed_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f)); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesLevelAdjustmentPreGainEchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // pre-amplifier submodule changes the gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.pre_gain_factor = 1.f; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), config, config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f)); + apm->ProcessStream(frame.data(), config, config, frame.data()); +} + +TEST(AudioProcessingImplTest, + EchoControllerObservesAnalogAgc1EchoPathGainChange) { + // Tests that the echo controller observes an echo path gain change when the + // AGC1 analog adaptive submodule changes the analog gain. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + webrtc::AudioProcessing::Config apm_config; + // Enable AGC1. + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.analog_gain_controller.enabled = true; + apm_config.gain_controller2.enabled = false; + apm_config.pre_amplifier.enabled = false; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 1000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + constexpr int kInitialStreamAnalogLevel = 123; + apm->set_stream_analog_level(kInitialStreamAnalogLevel); + + // When the first fame is processed, no echo path gain change must be + // detected. + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + // Simulate the application of the recommended analog level. + int recommended_analog_level = apm->recommended_stream_analog_level(); + if (recommended_analog_level == kInitialStreamAnalogLevel) { + // Force an analog gain change if it did not happen. + recommended_analog_level++; + } + apm->set_stream_analog_level(recommended_analog_level); + + // After the first fame and with a stream analog level change, the echo path + // gain change must be detected. + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} + +// Tests that a stream is successfully processed when AGC2 adaptive digital is +// used and when the field trial +// `WebRTC-Audio-TransientSuppressorVadMode/Enabled-Default/` is set. +TEST(AudioProcessingImplTest, + ProcessWithAgc2AndTransientSuppressorVadModeDefault) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + auto apm = AudioProcessingBuilder() + .SetConfig({.gain_controller1{.enabled = false}}) + .Create(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = true; + apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + for (int i = 0; i < kFramesToProcess; ++i) { + RandomizeSampleVector(&random_generator, buffer); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +// Tests that a stream is successfully processed when AGC2 adaptive digital is +// used and when the field trial +// `WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad/` is set. +TEST(AudioProcessingImplTest, + ProcessWithAgc2AndTransientSuppressorVadModeRnnVad) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = true; + apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + for (int i = 0; i < kFramesToProcess; ++i) { + RandomizeSampleVector(&random_generator, buffer); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { + // Tests that the echo controller observes an echo path gain change when a + // playout volume change is reported. + auto echo_control_factory = std::make_unique(); + const auto* echo_control_factory_ptr = echo_control_factory.get(); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + // Disable AGC. + webrtc::AudioProcessing::Config apm_config; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + frame.fill(kAudioLevel); + + MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext(); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); + + EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); + EXPECT_CALL(*echo_control_mock, + ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/true)) + .Times(1); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(100)); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); +} + +TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { + // Make sure that signal changes caused by a render pre-processing sub-module + // take place before any echo detector analysis. + auto test_echo_detector = rtc::make_ref_counted(); + std::unique_ptr test_render_pre_processor( + new TestRenderPreProcessor()); + // Create APM injecting the test echo detector and render pre-processor. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(test_echo_detector) + .SetRenderPreProcessing(std::move(test_render_pre_processor)) + .Create(); + webrtc::AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 1000; + constexpr int kSampleRateHz = 16000; + constexpr size_t kNumChannels = 1; + // Explicitly initialize APM to ensure no render frames are discarded. + const ProcessingConfig processing_config = {{ + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + {kSampleRateHz, kNumChannels}, + }}; + apm->Initialize(processing_config); + + std::array frame; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + + constexpr float kAudioLevelFloat = static_cast(kAudioLevel); + constexpr float kExpectedPreprocessedAudioLevel = + TestRenderPreProcessor::ProcessSample(kAudioLevelFloat); + ASSERT_NE(kAudioLevelFloat, kExpectedPreprocessedAudioLevel); + + // Analyze a render stream frame. + frame.fill(kAudioLevel); + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm->ProcessReverseStream(frame.data(), stream_config, + stream_config, frame.data())); + // Trigger a call to in EchoDetector::AnalyzeRenderAudio() via + // ProcessStream(). + frame.fill(kAudioLevel); + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm->ProcessStream(frame.data(), stream_config, stream_config, + frame.data())); + // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is + // triggered, the line below checks that the call has occurred. If not, the + // APM implementation may have changed and this test might need to be adapted. + ASSERT_TRUE(test_echo_detector->analyze_render_audio_called()); + // Check that the data read in EchoDetector::AnalyzeRenderAudio() is that + // produced by the render pre-processor. + EXPECT_EQ(kExpectedPreprocessedAudioLevel, + test_echo_detector->last_render_audio_first_sample()); +} + +// Disabling build-optional submodules and trying to enable them via the APM +// config should be bit-exact with running APM with said submodules disabled. +// This mainly tests that SetCreateOptionalSubmodulesForTesting has an effect. +TEST(ApmWithSubmodulesExcludedTest, BitexactWithDisabledModules) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.transient_suppression.enabled = true; + apm->ApplyConfig(apm_config); + + rtc::scoped_refptr apm_reference = + AudioProcessingBuilder().Create(); + apm_config = apm_reference->GetConfig(); + apm_config.transient_suppression.enabled = false; + apm_reference->ApplyConfig(apm_config); + + constexpr int kSampleRateHz = 16000; + constexpr int kNumChannels = 1; + std::array buffer; + std::array buffer_reference; + float* channel_pointers[] = {buffer.data()}; + float* channel_pointers_reference[] = {buffer_reference.data()}; + StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz, + /*num_channels=*/kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 10; + + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + std::copy(buffer.begin(), buffer.end(), buffer_reference.begin()); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + ASSERT_EQ( + apm_reference->ProcessStream(channel_pointers_reference, stream_config, + stream_config, channel_pointers_reference), + kNoErr); + for (int j = 0; j < kSampleRateHz / 100; ++j) { + EXPECT_EQ(buffer[j], buffer_reference[j]); + } + } +} + +// Disable transient suppressor creation and run APM in ways that should trigger +// calls to the transient suppressor API. +TEST(ApmWithSubmodulesExcludedTest, ReinitializeTransientSuppressor) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), kNoErr); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + AudioProcessing::Config config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + // 960 samples per frame: 10 ms of <= 48 kHz audio with <= 2 channels. + float buffer[960]; + float* channel_pointers[] = {&buffer[0], &buffer[480]}; + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 3; + + StreamConfig initial_stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/1); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, initial_stream_config, + initial_stream_config, channel_pointers), + kNoErr); + } + + StreamConfig stereo_stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/2); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stereo_stream_config, + stereo_stream_config, channel_pointers), + kNoErr); + } + + StreamConfig high_sample_rate_stream_config(/*sample_rate_hz=*/48000, + /*num_channels=*/2); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ( + apm->ProcessStream(channel_pointers, high_sample_rate_stream_config, + high_sample_rate_stream_config, channel_pointers), + kNoErr); + } +} + +// Disable transient suppressor creation and run APM in ways that should trigger +// calls to the transient suppressor API. +TEST(ApmWithSubmodulesExcludedTest, ToggleTransientSuppressor) { + auto apm = rtc::make_ref_counted(); + ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); + + ApmSubmoduleCreationOverrides overrides; + overrides.transient_suppression = true; + apm->OverrideSubmoduleCreationForTesting(overrides); + + // 960 samples per frame: 10 ms of <= 48 kHz audio with <= 2 channels. + float buffer[960]; + float* channel_pointers[] = {&buffer[0], &buffer[480]}; + Random random_generator(2341U); + constexpr int kFramesToProcessPerConfiguration = 3; + StreamConfig stream_config(/*sample_rate_hz=*/16000, + /*num_channels=*/1); + + AudioProcessing::Config config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } + + config = apm->GetConfig(); + config.transient_suppression.enabled = false; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } + + config = apm->GetConfig(); + config.transient_suppression.enabled = true; + apm->ApplyConfig(config); + for (int i = 0; i < kFramesToProcessPerConfiguration; ++i) { + RandomizeSampleVector(&random_generator, buffer); + EXPECT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + } +} + +class StartupInputVolumeParameterizedTest + : public ::testing::TestWithParam {}; + +// Tests that, when no input volume controller is used, the startup input volume +// is never modified. +TEST_P(StartupInputVolumeParameterizedTest, + WithNoInputVolumeControllerStartupVolumeNotModified) { + webrtc::AudioProcessing::Config config; + config.gain_controller1.enabled = false; + config.gain_controller2.enabled = false; + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + int startup_volume = GetParam(); + int recommended_volume = ProcessInputVolume( + *apm, /*num_frames=*/1, /*initial_volume=*/startup_volume); + EXPECT_EQ(recommended_volume, startup_volume); +} + +INSTANTIATE_TEST_SUITE_P(AudioProcessingImplTest, + StartupInputVolumeParameterizedTest, + ::testing::Values(0, 5, 15, 50, 100)); + +// Tests that, when no input volume controller is used, the recommended input +// volume always matches the applied one. +TEST(AudioProcessingImplTest, + WithNoInputVolumeControllerAppliedAndRecommendedVolumesMatch) { + webrtc::AudioProcessing::Config config; + config.gain_controller1.enabled = false; + config.gain_controller2.enabled = false; + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + Random rand_gen(42); + for (int i = 0; i < 32; ++i) { + SCOPED_TRACE(i); + int32_t applied_volume = rand_gen.Rand(/*low=*/0, /*high=*/255); + int recommended_volume = + ProcessInputVolume(*apm, /*num_frames=*/1, applied_volume); + EXPECT_EQ(recommended_volume, applied_volume); + } +} + +class ApmInputVolumeControllerParametrizedTest + : public ::testing::TestWithParam< + std::tuple> { + protected: + ApmInputVolumeControllerParametrizedTest() + : sample_rate_hz_(std::get<0>(GetParam())), + num_channels_(std::get<1>(GetParam())), + channels_(num_channels_), + channel_pointers_(num_channels_) { + const int frame_size = sample_rate_hz_ / 100; + for (int c = 0; c < num_channels_; ++c) { + channels_[c].resize(frame_size); + channel_pointers_[c] = channels_[c].data(); + std::fill(channels_[c].begin(), channels_[c].end(), 0.0f); + } + } + + int sample_rate_hz() const { return sample_rate_hz_; } + int num_channels() const { return num_channels_; } + AudioProcessing::Config GetConfig() const { return std::get<2>(GetParam()); } + + float* const* channel_pointers() { return channel_pointers_.data(); } + + private: + const int sample_rate_hz_; + const int num_channels_; + std::vector> channels_; + std::vector channel_pointers_; +}; + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAtStartupWithZeroVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(0); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), 0); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAtStartupWithNonZeroVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + constexpr int kStartupVolume = 3; + apm->set_stream_analog_level(kStartupVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), kStartupVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + EnforceMinInputVolumeAfterManualVolumeAdjustment) { + const auto config = GetConfig(); + if (config.gain_controller1.enabled) { + // After a downward manual adjustment, AGC1 slowly converges to the minimum + // input volume. + GTEST_SKIP() << "Does not apply to AGC1"; + } + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(20); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + constexpr int kManuallyAdjustedVolume = 3; + apm->set_stream_analog_level(kManuallyAdjustedVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_GT(apm->recommended_stream_analog_level(), kManuallyAdjustedVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + DoNotEnforceMinInputVolumeAtStartupWithHighVolume) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + constexpr int kStartupVolume = 200; + apm->set_stream_analog_level(kStartupVolume); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_EQ(apm->recommended_stream_analog_level(), kStartupVolume); +} + +TEST_P(ApmInputVolumeControllerParametrizedTest, + DoNotEnforceMinInputVolumeAfterManualVolumeAdjustmentToZero) { + const StreamConfig stream_config(sample_rate_hz(), num_channels()); + auto apm = AudioProcessingBuilder().SetConfig(GetConfig()).Create(); + + apm->set_stream_analog_level(100); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + apm->set_stream_analog_level(0); + apm->ProcessStream(channel_pointers(), stream_config, stream_config, + channel_pointers()); + EXPECT_EQ(apm->recommended_stream_analog_level(), 0); +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplTest, + ApmInputVolumeControllerParametrizedTest, + ::testing::Combine( + ::testing::Values(8000, 16000, 32000, 48000), // Sample rates. + ::testing::Values(1, 2), // Number of channels. + ::testing::Values( + // Full AGC1. + AudioProcessing::Config{ + .gain_controller1 = {.enabled = true, + .analog_gain_controller = + {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC. + AudioProcessing::Config{ + .gain_controller1 = {.enabled = true, + .analog_gain_controller = + {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}}))); + +// When the input volume is not emulated and no input volume controller is +// active, the recommended volume must always be the applied volume. +TEST(AudioProcessingImplTest, + RecommendAppliedInputVolumeWithNoAgcWithNoEmulation) { + auto apm = AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = false}, + .gain_controller1 = {.enabled = false}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +// When the input volume is emulated, the recommended volume must always be the +// applied volume and at any time it must not be that set in the input volume +// emulator. +// TODO(bugs.webrtc.org/14581): Enable when APM fixed to let this test pass. +TEST(AudioProcessingImplTest, + DISABLED_RecommendAppliedInputVolumeWithNoAgcWithEmulation) { + auto apm = + AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = true, + .analog_mic_gain_emulation{ + .enabled = true, + .initial_level = 255}}, + .gain_controller1 = {.enabled = false}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +// Even if there is an enabled input volume controller, when the input volume is +// emulated, the recommended volume is always the applied volume because the +// active controller must only adjust the internally emulated volume and leave +// the externally applied volume unchanged. +// TODO(bugs.webrtc.org/14581): Enable when APM fixed to let this test pass. +TEST(AudioProcessingImplTest, + DISABLED_RecommendAppliedInputVolumeWithAgcWithEmulation) { + auto apm = + AudioProcessingBuilder() + .SetConfig({.capture_level_adjustment = {.enabled = true, + .analog_mic_gain_emulation{ + .enabled = true}}, + .gain_controller1 = {.enabled = true, + .analog_gain_controller{ + .enabled = true, + }}}) + .Create(); + + constexpr int kOneFrame = 1; + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/123), 123); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/59), 59); + EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); +} + +TEST(AudioProcessingImplTest, + Agc2FieldTrialDoNotSwitchToFullAgc2WhenNoAgcIsActive) { + constexpr AudioProcessing::Config kOriginal{ + .gain_controller1{.enabled = false}, + .gain_controller2{.enabled = false}, + }; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); +} + +TEST(AudioProcessingImplTest, + Agc2FieldTrialDoNotSwitchToFullAgc2WithAgc1Agc2InputVolumeControllers) { + constexpr AudioProcessing::Config kOriginal{ + .gain_controller1{.enabled = true, + .analog_gain_controller{.enabled = true}}, + .gain_controller2{.enabled = true, + .input_volume_controller{.enabled = true}}, + }; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, kOriginal.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, kOriginal.gain_controller2); +} + +class Agc2FieldTrialParametrizedTest + : public ::testing::TestWithParam {}; + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotChangeConfigIfDisabled) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotChangeConfigIfNoOverride) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:false," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, DoNotSwitchToFullAgc2) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_EQ(adjusted.gain_controller1, original.gain_controller1); + EXPECT_EQ(adjusted.gain_controller2, original.gain_controller2); +} + +TEST_P(Agc2FieldTrialParametrizedTest, SwitchToFullAgc2) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); +} + +TEST_P(Agc2FieldTrialParametrizedTest, + SwitchToFullAgc2AndOverrideInputVolumeControllerParameters) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true," + "min_input_volume:123," + "clipped_level_min:20," + "clipped_level_step:30," + "clipped_ratio_threshold:0.4," + "clipped_wait_frames:50," + "enable_clipping_predictor:true," + "target_range_max_dbfs:-6," + "target_range_min_dbfs:-70," + "update_input_volume_wait_frames:80," + "speech_probability_threshold:0.9," + "speech_ratio_threshold:1.0/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); +} + +TEST_P(Agc2FieldTrialParametrizedTest, + SwitchToFullAgc2AndOverrideAdaptiveDigitalControllerParameters) { + const AudioProcessing::Config original = GetParam(); + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled,switch_to_agc2:true," + "headroom_db:10," + "max_gain_db:20," + "initial_gain_db:7," + "max_gain_change_db_per_second:5," + "max_output_noise_level_dbfs:-40/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(original).Create()->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + ASSERT_NE(adjusted.gain_controller2.adaptive_digital, + original.gain_controller2.adaptive_digital); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.headroom_db, 10); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.max_gain_db, 20); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.initial_gain_db, 7); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_gain_change_db_per_second, + 5); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_output_noise_level_dbfs, + -40); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(original); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.gain_controller1.enabled); + EXPECT_TRUE(adjusted.gain_controller2.enabled); + EXPECT_TRUE(adjusted.gain_controller2.input_volume_controller.enabled); + EXPECT_TRUE(adjusted.gain_controller2.adaptive_digital.enabled); + ASSERT_NE(adjusted.gain_controller2.adaptive_digital, + original.gain_controller2.adaptive_digital); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.headroom_db, 10); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.max_gain_db, 20); + EXPECT_EQ(adjusted.gain_controller2.adaptive_digital.initial_gain_db, 7); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_gain_change_db_per_second, + 5); + EXPECT_EQ( + adjusted.gain_controller2.adaptive_digital.max_output_noise_level_dbfs, + -40); +} + +TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithTs) { + AudioProcessing::Config config = GetParam(); + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, ProcessSucceedsWithoutTs) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:false," + "disallow_transient_suppressor_usage:true/"); + auto apm = AudioProcessingBuilder().SetConfig(GetParam()).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, + ProcessSucceedsWhenSwitchToFullAgc2WithTs) { + AudioProcessing::Config config = GetParam(); + if (!config.transient_suppression.enabled) { + GTEST_SKIP() << "TS is disabled, skip."; + } + + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:true," + "disallow_transient_suppressor_usage:false/"); + auto apm = AudioProcessingBuilder().SetConfig(config).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +TEST_P(Agc2FieldTrialParametrizedTest, + ProcessSucceedsWhenSwitchToFullAgc2WithoutTs) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "switch_to_agc2:true," + "disallow_transient_suppressor_usage:true/"); + auto apm = AudioProcessingBuilder().SetConfig(GetParam()).Create(); + + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + std::array buffer; + float* channel_pointers[] = {buffer.data()}; + StreamConfig stream_config(kSampleRateHz, kNumChannels); + Random random_generator(2341U); + constexpr int kFramesToProcess = 10; + int volume = 100; + for (int i = 0; i < kFramesToProcess; ++i) { + SCOPED_TRACE(i); + RandomizeSampleVector(&random_generator, buffer); + apm->set_stream_analog_level(volume); + ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config, + channel_pointers), + kNoErr); + volume = apm->recommended_stream_analog_level(); + } +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplTest, + Agc2FieldTrialParametrizedTest, + ::testing::Values( + // Full AGC1, TS disabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Full AGC1, TS enabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC, TS disabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = false}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}}, + // Hybrid AGC, TS enabled. + AudioProcessing::Config{ + .transient_suppression = {.enabled = true}, + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}})); + +TEST(AudioProcessingImplTest, CanDisableTransientSuppressor) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanEnableTs) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanDisableTsWithAgc2FieldTrialDisabled) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, CanEnableTsWithAgc2FieldTrialDisabled) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Disabled/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CanDisableTsWithAgc2FieldTrialEnabledAndUsageAllowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = false}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CanEnableTsWithAgc2FieldTrialEnabledAndUsageAllowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:false/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_TRUE(adjusted.transient_suppression.enabled); +} + +TEST(AudioProcessingImplTest, + CannotEnableTsWithAgc2FieldTrialEnabledAndUsageDisallowed) { + constexpr AudioProcessing::Config kOriginal = { + .transient_suppression = {.enabled = true}}; + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/Enabled," + "disallow_transient_suppressor_usage:true/"); + + // Test config application via `AudioProcessing` ctor. + auto adjusted = + AudioProcessingBuilder().SetConfig(kOriginal).Create()->GetConfig(); + EXPECT_FALSE(adjusted.transient_suppression.enabled); + + // Test config application via `AudioProcessing::ApplyConfig()`. + auto apm = AudioProcessingBuilder().Create(); + apm->ApplyConfig(kOriginal); + adjusted = apm->GetConfig(); + EXPECT_FALSE(apm->GetConfig().transient_suppression.enabled); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc new file mode 100644 index 0000000000..10d3d84951 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_performance_unittest.cc @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/numerics/samples_stats_counter.h" +#include "api/test/metrics/global_metrics_logger_and_exporter.h" +#include "api/test/metrics/metric.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/event.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::webrtc::test::GetGlobalMetricsLogger; +using ::webrtc::test::ImprovementDirection; +using ::webrtc::test::Metric; +using ::webrtc::test::Unit; + +class CallSimulator; + +// Type of the render thread APM API call to use in the test. +enum class ProcessorType { kRender, kCapture }; + +// Variant of APM processing settings to use in the test. +enum class SettingsType { + kDefaultApmDesktop, + kDefaultApmMobile, + kAllSubmodulesTurnedOff, + kDefaultApmDesktopWithoutDelayAgnostic, + kDefaultApmDesktopWithoutExtendedFilter +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(size_t max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + std::vector output_frame_channels; + std::vector output_frame; + std::vector input_framechannels; + std::vector input_frame; + StreamConfig input_stream_config; + StreamConfig output_stream_config; +}; + +// The configuration for the test. +struct SimulationConfig { + SimulationConfig(int sample_rate_hz, SettingsType simulation_settings) + : sample_rate_hz(sample_rate_hz), + simulation_settings(simulation_settings) {} + + static std::vector GenerateSimulationConfigs() { + std::vector simulation_configs; +#ifndef WEBRTC_ANDROID + const SettingsType desktop_settings[] = { + SettingsType::kDefaultApmDesktop, SettingsType::kAllSubmodulesTurnedOff, + SettingsType::kDefaultApmDesktopWithoutDelayAgnostic, + SettingsType::kDefaultApmDesktopWithoutExtendedFilter}; + + const int desktop_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : desktop_sample_rates) { + for (auto settings : desktop_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } +#endif + + const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile}; + + const int mobile_sample_rates[] = {8000, 16000}; + + for (auto sample_rate : mobile_sample_rates) { + for (auto settings : mobile_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + return simulation_configs; + } + + std::string SettingsDescription() const { + std::string description; + switch (simulation_settings) { + case SettingsType::kDefaultApmMobile: + description = "DefaultApmMobile"; + break; + case SettingsType::kDefaultApmDesktop: + description = "DefaultApmDesktop"; + break; + case SettingsType::kAllSubmodulesTurnedOff: + description = "AllSubmodulesOff"; + break; + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: + description = "DefaultApmDesktopWithoutDelayAgnostic"; + break; + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: + description = "DefaultApmDesktopWithoutExtendedFilter"; + break; + } + return description; + } + + int sample_rate_hz = 16000; + SettingsType simulation_settings = SettingsType::kDefaultApmDesktop; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { render_count_.fetch_add(1); } + + void IncreaseCaptureCounter() { capture_count_.fetch_add(1); } + + int CaptureMinusRenderCounters() const { + // The return value will be approximate, but that's good enough since + // by the time we return the value, it's not guaranteed to be correct + // anyway. + return capture_count_.load(std::memory_order_acquire) - + render_count_.load(std::memory_order_acquire); + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) const { + // TODO(tommi): We could use an event to signal this so that we don't need + // to be polling from the main thread and possibly steal cycles. + const int capture_count = capture_count_.load(std::memory_order_acquire); + const int render_count = render_count_.load(std::memory_order_acquire); + return (render_count > threshold && capture_count > threshold); + } + + private: + std::atomic render_count_{0}; + std::atomic capture_count_{0}; +}; + +// Class that represents a flag that can only be raised. +class LockedFlag { + public: + bool get_flag() const { return flag_.load(std::memory_order_acquire); } + + void set_flag() { + if (!get_flag()) { + // read-only operation to avoid affecting the cache-line. + int zero = 0; + flag_.compare_exchange_strong(zero, 1); + } + } + + private: + std::atomic flag_{0}; +}; + +// Parent class for the thread processors. +class TimedThreadApiProcessor { + public: + TimedThreadApiProcessor(ProcessorType processor_type, + Random* rand_gen, + FrameCounters* shared_counters_state, + LockedFlag* capture_call_checker, + CallSimulator* test_framework, + const SimulationConfig* simulation_config, + AudioProcessing* apm, + int num_durations_to_store, + float input_level, + int num_channels) + : rand_gen_(rand_gen), + frame_counters_(shared_counters_state), + capture_call_checker_(capture_call_checker), + test_(test_framework), + simulation_config_(simulation_config), + apm_(apm), + frame_data_(kMaxFrameSize), + clock_(webrtc::Clock::GetRealTimeClock()), + num_durations_to_store_(num_durations_to_store), + api_call_durations_(num_durations_to_store_ - kNumInitializationFrames), + samples_count_(0), + input_level_(input_level), + processor_type_(processor_type), + num_channels_(num_channels) {} + + // Implements the callback functionality for the threads. + bool Process(); + + // Method for printing out the simulation statistics. + void print_processor_statistics(absl::string_view processor_name) const { + const std::string modifier = "_api_call_duration"; + + const std::string sample_rate_name = + "_" + std::to_string(simulation_config_->sample_rate_hz) + "Hz"; + + GetGlobalMetricsLogger()->LogMetric( + "apm_timing" + sample_rate_name, processor_name, api_call_durations_, + Unit::kMilliseconds, ImprovementDirection::kNeitherIsBetter); + } + + void AddDuration(int64_t duration) { + if (samples_count_ >= kNumInitializationFrames && + samples_count_ < num_durations_to_store_) { + api_call_durations_.AddSample(duration); + } + samples_count_++; + } + + private: + static const int kMaxCallDifference = 10; + static const int kMaxFrameSize = 480; + static const int kNumInitializationFrames = 5; + + int ProcessCapture() { + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Call and time the specified capture side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + + frame_counters_->IncreaseCaptureCounter(); + + AddDuration(end_time - start_time); + + if (first_process_call_) { + // Flag that the capture side has been called at least once + // (needed to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + capture_call_checker_->set_flag(); + first_process_call_ = false; + } + return result; + } + + bool ReadyToProcessCapture() { + return (frame_counters_->CaptureMinusRenderCounters() <= + kMaxCallDifference); + } + + int ProcessRender() { + // Call and time the specified render side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + frame_counters_->IncreaseRenderCounter(); + + AddDuration(end_time - start_time); + + return result; + } + + bool ReadyToProcessRender() { + // Do not process until at least one capture call has been done. + // (implicitly required by the APM API). + if (first_process_call_ && !capture_call_checker_->get_flag()) { + return false; + } + + // Ensure that the number of render and capture calls do not differ too + // much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + return false; + } + + first_process_call_ = false; + return true; + } + + void PrepareFrame() { + // Lambda function for populating a float multichannel audio frame + // with random data. + auto populate_audio_frame = [](float amplitude, size_t num_channels, + size_t samples_per_channel, Random* rand_gen, + float** frame) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random float number with a value between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->Rand() - 1); + } + } + }; + + // Prepare the audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.input_stream_config.set_num_channels(num_channels_); + populate_audio_frame(input_level_, num_channels_, + (simulation_config_->sample_rate_hz * + AudioProcessing::kChunkSizeMs / 1000), + rand_gen_, &frame_data_.input_frame[0]); + + // Prepare the float audio output data and metadata. + frame_data_.output_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.output_stream_config.set_num_channels(1); + } + + bool ReadyToProcess() { + switch (processor_type_) { + case ProcessorType::kRender: + return ReadyToProcessRender(); + + case ProcessorType::kCapture: + return ReadyToProcessCapture(); + } + + // Should not be reached, but the return statement is needed for the code to + // build successfully on Android. + RTC_DCHECK_NOTREACHED(); + return false; + } + + Random* rand_gen_ = nullptr; + FrameCounters* frame_counters_ = nullptr; + LockedFlag* capture_call_checker_ = nullptr; + CallSimulator* test_ = nullptr; + const SimulationConfig* const simulation_config_ = nullptr; + AudioProcessing* apm_ = nullptr; + AudioFrameData frame_data_; + webrtc::Clock* clock_; + const size_t num_durations_to_store_; + SamplesStatsCounter api_call_durations_; + size_t samples_count_ = 0; + const float input_level_; + bool first_process_call_ = true; + const ProcessorType processor_type_; + const int num_channels_ = 1; +}; + +// Class for managing the test simulation. +class CallSimulator : public ::testing::TestWithParam { + public: + CallSimulator() + : rand_gen_(42U), + simulation_config_(static_cast(GetParam())) {} + + // Run the call simulation with a timeout. + bool Run() { + StartThreads(); + + bool result = test_complete_.Wait(kTestTimeout); + + StopThreads(); + + render_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_render"); + capture_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_capture"); + + return result; + } + + // Tests whether all the required render and capture side calls have been + // done. + bool MaybeEndTest() { + if (frame_counters_.BothCountersExceedeThreshold(kMinNumFramesToProcess)) { + test_complete_.Set(); + return true; + } + return false; + } + + private: + static const float kCaptureInputFloatLevel; + static const float kRenderInputFloatLevel; + static const int kMinNumFramesToProcess = 150; + static constexpr TimeDelta kTestTimeout = + TimeDelta::Millis(3 * 10 * kMinNumFramesToProcess); + + // Stop all running threads. + void StopThreads() { + render_thread_.Finalize(); + capture_thread_.Finalize(); + } + + // Simulator and APM setup. + void SetUp() override { + // Lambda function for setting the default APM runtime settings for desktop. + auto set_default_desktop_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_config.noise_suppression.enabled = true; + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm->ApplyConfig(apm_config); + }; + + // Lambda function for setting the default APM runtime settings for mobile. + auto set_default_mobile_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = true; + apm_config.noise_suppression.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; + apm->ApplyConfig(apm_config); + }; + + // Lambda function for turning off all of the APM runtime settings + // submodules. + auto turn_off_default_apm_runtime_settings = [](AudioProcessing* apm) { + AudioProcessing::Config apm_config = apm->GetConfig(); + apm_config.echo_canceller.enabled = false; + apm_config.gain_controller1.enabled = false; + apm_config.noise_suppression.enabled = false; + apm->ApplyConfig(apm_config); + }; + + int num_capture_channels = 1; + switch (simulation_config_.simulation_settings) { + case SettingsType::kDefaultApmMobile: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_mobile_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktop: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kAllSubmodulesTurnedOff: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + turn_off_default_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: { + apm_ = AudioProcessingBuilderForTesting().Create(); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + break; + } + } + + render_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kRender, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kRenderInputFloatLevel, 1)); + capture_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kCapture, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels)); + } + + // Start the threads used in the test. + void StartThreads() { + const auto attributes = + rtc::ThreadAttributes().SetPriority(rtc::ThreadPriority::kRealtime); + render_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (render_thread_state_->Process()) { + } + }, + "render", attributes); + capture_thread_ = rtc::PlatformThread::SpawnJoinable( + [this] { + while (capture_thread_state_->Process()) { + } + }, + "capture", attributes); + } + + // Event handler for the test. + rtc::Event test_complete_; + + // Thread related variables. + Random rand_gen_; + + rtc::scoped_refptr apm_; + const SimulationConfig simulation_config_; + FrameCounters frame_counters_; + LockedFlag capture_call_checker_; + std::unique_ptr render_thread_state_; + std::unique_ptr capture_thread_state_; + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; +}; + +// Implements the callback functionality for the threads. +bool TimedThreadApiProcessor::Process() { + PrepareFrame(); + + // Wait in a spinlock manner until it is ok to start processing. + // Note that SleepMs is not applicable since it only allows sleeping + // on a millisecond basis which is too long. + // TODO(tommi): This loop may affect the performance of the test that it's + // meant to measure. See if we could use events instead to signal readiness. + while (!ReadyToProcess()) { + } + + int result = AudioProcessing::kNoError; + switch (processor_type_) { + case ProcessorType::kRender: + result = ProcessRender(); + break; + case ProcessorType::kCapture: + result = ProcessCapture(); + break; + } + + EXPECT_EQ(result, AudioProcessing::kNoError); + + return !test_->MaybeEndTest(); +} + +const float CallSimulator::kRenderInputFloatLevel = 0.5f; +const float CallSimulator::kCaptureInputFloatLevel = 0.03125f; +} // anonymous namespace + +// TODO(peah): Reactivate once issue 7712 has been resolved. +TEST_P(CallSimulator, DISABLED_ApiCallDurationTest) { + // Run test and verify that it did not time out. + EXPECT_TRUE(Run()); +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingPerformanceTest, + CallSimulator, + ::testing::ValuesIn(SimulationConfig::GenerateSimulationConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build new file mode 100644 index 0000000000..f0af20c335 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("audio_processing_statistics_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc b/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc new file mode 100644 index 0000000000..e320e71405 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/audio_processing_unittest.cc @@ -0,0 +1,3441 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/include/audio_processing.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "api/audio/echo_detector_creator.h" +#include "api/make_ref_counted.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/fake_clock.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/protobuf_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/system/arch.h" +#include "rtc_base/task_queue_for_test.h" +#include "rtc_base/thread.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h" +#else +#include "modules/audio_processing/test/unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +ABSL_FLAG(bool, + write_apm_ref_data, + false, + "Write ApmTest.Process results to file, instead of comparing results " + "to the existing reference data file."); + +namespace webrtc { +namespace { + +// All sample rates used by APM internally during processing. Other input / +// output rates are resampled to / from one of these. +const int kProcessSampleRates[] = {16000, 32000, 48000}; + +enum StreamDirection { kForward = 0, kReverse }; + +void ConvertToFloat(const int16_t* int_data, ChannelBuffer* cb) { + ChannelBuffer cb_int(cb->num_frames(), cb->num_channels()); + Deinterleave(int_data, cb->num_frames(), cb->num_channels(), + cb_int.channels()); + for (size_t i = 0; i < cb->num_channels(); ++i) { + S16ToFloat(cb_int.channels()[i], cb->num_frames(), cb->channels()[i]); + } +} + +void ConvertToFloat(const Int16FrameData& frame, ChannelBuffer* cb) { + ConvertToFloat(frame.data.data(), cb); +} + +void MixStereoToMono(const float* stereo, + float* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; +} + +void MixStereoToMono(const int16_t* stereo, + int16_t* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; +} + +void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + stereo[i * 2 + 1] = stereo[i * 2]; + } +} + +void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]); + } +} + +void SetFrameTo(Int16FrameData* frame, int16_t value) { + for (size_t i = 0; i < frame->samples_per_channel * frame->num_channels; + ++i) { + frame->data[i] = value; + } +} + +void SetFrameTo(Int16FrameData* frame, int16_t left, int16_t right) { + ASSERT_EQ(2u, frame->num_channels); + for (size_t i = 0; i < frame->samples_per_channel * 2; i += 2) { + frame->data[i] = left; + frame->data[i + 1] = right; + } +} + +void ScaleFrame(Int16FrameData* frame, float scale) { + for (size_t i = 0; i < frame->samples_per_channel * frame->num_channels; + ++i) { + frame->data[i] = FloatS16ToS16(frame->data[i] * scale); + } +} + +bool FrameDataAreEqual(const Int16FrameData& frame1, + const Int16FrameData& frame2) { + if (frame1.samples_per_channel != frame2.samples_per_channel) { + return false; + } + if (frame1.num_channels != frame2.num_channels) { + return false; + } + if (memcmp( + frame1.data.data(), frame2.data.data(), + frame1.samples_per_channel * frame1.num_channels * sizeof(int16_t))) { + return false; + } + return true; +} + +rtc::ArrayView GetMutableFrameData(Int16FrameData* frame) { + int16_t* ptr = frame->data.data(); + const size_t len = frame->samples_per_channel * frame->num_channels; + return rtc::ArrayView(ptr, len); +} + +rtc::ArrayView GetFrameData(const Int16FrameData& frame) { + const int16_t* ptr = frame.data.data(); + const size_t len = frame.samples_per_channel * frame.num_channels; + return rtc::ArrayView(ptr, len); +} + +void EnableAllAPComponents(AudioProcessing* ap) { + AudioProcessing::Config apm_config = ap->GetConfig(); + apm_config.echo_canceller.enabled = true; +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + apm_config.echo_canceller.mobile_mode = true; + + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveDigital; +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + apm_config.echo_canceller.mobile_mode = false; + + apm_config.gain_controller1.enabled = true; + apm_config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; +#endif + + apm_config.noise_suppression.enabled = true; + + apm_config.high_pass_filter.enabled = true; + apm_config.pipeline.maximum_internal_processing_rate = 48000; + ap->ApplyConfig(apm_config); +} + +// These functions are only used by ApmTest.Process. +template +T AbsValue(T a) { + return a > 0 ? a : -a; +} + +int16_t MaxAudioFrame(const Int16FrameData& frame) { + const size_t length = frame.samples_per_channel * frame.num_channels; + int16_t max_data = AbsValue(frame.data[0]); + for (size_t i = 1; i < length; i++) { + max_data = std::max(max_data, AbsValue(frame.data[i])); + } + + return max_data; +} + +void OpenFileAndWriteMessage(absl::string_view filename, + const MessageLite& msg) { + FILE* file = fopen(std::string(filename).c_str(), "wb"); + ASSERT_TRUE(file != NULL); + + int32_t size = rtc::checked_cast(msg.ByteSizeLong()); + ASSERT_GT(size, 0); + std::unique_ptr array(new uint8_t[size]); + ASSERT_TRUE(msg.SerializeToArray(array.get(), size)); + + ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); + ASSERT_EQ(static_cast(size), + fwrite(array.get(), sizeof(array[0]), size, file)); + fclose(file); +} + +std::string ResourceFilePath(absl::string_view name, int sample_rate_hz) { + rtc::StringBuilder ss; + // Resource files are all stereo. + ss << name << sample_rate_hz / 1000 << "_stereo"; + return test::ResourcePath(ss.str(), "pcm"); +} + +// Temporary filenames unique to this process. Used to be able to run these +// tests in parallel as each process needs to be running in isolation they can't +// have competing filenames. +std::map temp_filenames; + +std::string OutputFilePath(absl::string_view name, + int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + StreamDirection file_direction) { + rtc::StringBuilder ss; + ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir" + << num_reverse_input_channels << "_" << reverse_input_rate / 1000 << "_"; + if (num_output_channels == 1) { + ss << "mono"; + } else if (num_output_channels == 2) { + ss << "stereo"; + } else { + RTC_DCHECK_NOTREACHED(); + } + ss << output_rate / 1000; + if (num_reverse_output_channels == 1) { + ss << "_rmono"; + } else if (num_reverse_output_channels == 2) { + ss << "_rstereo"; + } else { + RTC_DCHECK_NOTREACHED(); + } + ss << reverse_output_rate / 1000; + ss << "_d" << file_direction << "_pcm"; + + std::string filename = ss.str(); + if (temp_filenames[filename].empty()) + temp_filenames[filename] = test::TempFilename(test::OutputPath(), filename); + return temp_filenames[filename]; +} + +void ClearTempFiles() { + for (auto& kv : temp_filenames) + remove(kv.second.c_str()); +} + +// Only remove "out" files. Keep "ref" files. +void ClearTempOutFiles() { + for (auto it = temp_filenames.begin(); it != temp_filenames.end();) { + const std::string& filename = it->first; + if (filename.substr(0, 3).compare("out") == 0) { + remove(it->second.c_str()); + temp_filenames.erase(it++); + } else { + it++; + } + } +} + +void OpenFileAndReadMessage(absl::string_view filename, MessageLite* msg) { + FILE* file = fopen(std::string(filename).c_str(), "rb"); + ASSERT_TRUE(file != NULL); + ReadMessageFromFile(file, msg); + fclose(file); +} + +// Reads a 10 ms chunk (actually AudioProcessing::GetFrameSize() samples per +// channel) of int16 interleaved audio from the given (assumed stereo) file, +// converts to deinterleaved float (optionally downmixing) and returns the +// result in `cb`. Returns false if the file ended (or on error) and true +// otherwise. +// +// `int_data` and `float_data` are just temporary space that must be +// sufficiently large to hold the 10 ms chunk. +bool ReadChunk(FILE* file, + int16_t* int_data, + float* float_data, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = cb->num_frames() * 2; + size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + RTC_DCHECK(feof(file)); + return false; // This is expected. + } + + S16ToFloat(int_data, frame_size, float_data); + if (cb->num_channels() == 1) { + MixStereoToMono(float_data, cb->channels()[0], cb->num_frames()); + } else { + Deinterleave(float_data, cb->num_frames(), 2, cb->channels()); + } + + return true; +} + +// Returns the reference file name that matches the current CPU +// architecture/optimizations. +std::string GetReferenceFilename() { +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + return test::ResourcePath("audio_processing/output_data_fixed", "pb"); +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + if (GetCPUInfo(kAVX2) != 0) { + return test::ResourcePath("audio_processing/output_data_float_avx2", "pb"); + } + return test::ResourcePath("audio_processing/output_data_float", "pb"); +#endif +} + +// Flag that can temporarily be enabled for local debugging to inspect +// `ApmTest.VerifyDebugDump(Int|Float)` failures. Do not upload code changes +// with this flag set to true. +constexpr bool kDumpWhenExpectMessageEqFails = false; + +// Checks the debug constants values used in this file so that no code change is +// submitted with values temporarily used for local debugging. +TEST(ApmUnitTests, CheckDebugConstants) { + ASSERT_FALSE(kDumpWhenExpectMessageEqFails); +} + +// Expects the equality of `actual` and `expected` by inspecting a hard-coded +// subset of `audioproc::Stream` fields. +void ExpectStreamFieldsEq(const audioproc::Stream& actual, + const audioproc::Stream& expected) { + EXPECT_EQ(actual.input_data(), expected.input_data()); + EXPECT_EQ(actual.output_data(), expected.output_data()); + EXPECT_EQ(actual.delay(), expected.delay()); + EXPECT_EQ(actual.drift(), expected.drift()); + EXPECT_EQ(actual.applied_input_volume(), expected.applied_input_volume()); + EXPECT_EQ(actual.keypress(), expected.keypress()); +} + +// Expects the equality of `actual` and `expected` by inspecting a hard-coded +// subset of `audioproc::Event` fields. +void ExpectEventFieldsEq(const audioproc::Event& actual, + const audioproc::Event& expected) { + EXPECT_EQ(actual.type(), expected.type()); + if (actual.type() != expected.type()) { + return; + } + switch (actual.type()) { + case audioproc::Event::STREAM: + ExpectStreamFieldsEq(actual.stream(), expected.stream()); + break; + default: + // Not implemented. + break; + } +} + +// Returns true if the `actual` and `expected` byte streams share the same size +// and contain the same data. If they differ and `kDumpWhenExpectMessageEqFails` +// is true, checks the equality of a subset of `audioproc::Event` (nested) +// fields. +bool ExpectMessageEq(rtc::ArrayView actual, + rtc::ArrayView expected) { + EXPECT_EQ(actual.size(), expected.size()); + if (actual.size() != expected.size()) { + return false; + } + if (memcmp(actual.data(), expected.data(), actual.size()) == 0) { + // Same message. No need to parse. + return true; + } + if (kDumpWhenExpectMessageEqFails) { + // Parse differing messages and expect equality to produce detailed error + // messages. + audioproc::Event event_actual, event_expected; + RTC_DCHECK(event_actual.ParseFromArray(actual.data(), actual.size())); + RTC_DCHECK(event_expected.ParseFromArray(expected.data(), expected.size())); + ExpectEventFieldsEq(event_actual, event_expected); + } + return false; +} + +class ApmTest : public ::testing::Test { + protected: + ApmTest(); + virtual void SetUp(); + virtual void TearDown(); + + static void SetUpTestSuite() {} + + static void TearDownTestSuite() { ClearTempFiles(); } + + // Used to select between int and float interface tests. + enum Format { kIntFormat, kFloatFormat }; + + void Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file); + void Init(AudioProcessing* ap); + void EnableAllComponents(); + bool ReadFrame(FILE* file, Int16FrameData* frame); + bool ReadFrame(FILE* file, Int16FrameData* frame, ChannelBuffer* cb); + void ReadFrameWithRewind(FILE* file, Int16FrameData* frame); + void ReadFrameWithRewind(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb); + void ProcessDelayVerificationTest(int delay_ms, + int system_delay_ms, + int delay_min, + int delay_max); + void TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return); + void TestChangingForwardChannels(size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return); + void TestChangingReverseChannels(size_t num_rev_channels, + AudioProcessing::Error expected_return); + void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); + void RunManualVolumeChangeIsPossibleTest(int sample_rate); + void StreamParametersTest(Format format); + int ProcessStreamChooser(Format format); + int AnalyzeReverseStreamChooser(Format format); + void ProcessDebugDump(absl::string_view in_filename, + absl::string_view out_filename, + Format format, + int max_size_bytes); + void VerifyDebugDumpTest(Format format); + + const std::string output_path_; + const std::string ref_filename_; + rtc::scoped_refptr apm_; + Int16FrameData frame_; + Int16FrameData revframe_; + std::unique_ptr> float_cb_; + std::unique_ptr> revfloat_cb_; + int output_sample_rate_hz_; + size_t num_output_channels_; + FILE* far_file_; + FILE* near_file_; + FILE* out_file_; +}; + +ApmTest::ApmTest() + : output_path_(test::OutputPath()), + ref_filename_(GetReferenceFilename()), + output_sample_rate_hz_(0), + num_output_channels_(0), + far_file_(NULL), + near_file_(NULL), + out_file_(NULL) { + apm_ = AudioProcessingBuilderForTesting().Create(); + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.analog_gain_controller.enabled = false; + apm_config.pipeline.maximum_internal_processing_rate = 48000; + apm_->ApplyConfig(apm_config); +} + +void ApmTest::SetUp() { + ASSERT_TRUE(apm_.get() != NULL); + + Init(32000, 32000, 32000, 2, 2, 2, false); +} + +void ApmTest::TearDown() { + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + far_file_ = NULL; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + near_file_ = NULL; + + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + out_file_ = NULL; +} + +void ApmTest::Init(AudioProcessing* ap) { + ASSERT_EQ( + kNoErr, + ap->Initialize({{{frame_.sample_rate_hz, frame_.num_channels}, + {output_sample_rate_hz_, num_output_channels_}, + {revframe_.sample_rate_hz, revframe_.num_channels}, + {revframe_.sample_rate_hz, revframe_.num_channels}}})); +} + +void ApmTest::Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file) { + SetContainerFormat(sample_rate_hz, num_input_channels, &frame_, &float_cb_); + output_sample_rate_hz_ = output_sample_rate_hz; + num_output_channels_ = num_output_channels; + + SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, &revframe_, + &revfloat_cb_); + Init(apm_.get()); + + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + std::string filename = ResourceFilePath("far", sample_rate_hz); + far_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(far_file_ != NULL) << "Could not open file " << filename << "\n"; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + filename = ResourceFilePath("near", sample_rate_hz); + near_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(near_file_ != NULL) << "Could not open file " << filename << "\n"; + + if (open_output_file) { + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + filename = OutputFilePath( + "out", sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_sample_rate_hz, num_input_channels, num_output_channels, + num_reverse_channels, num_reverse_channels, kForward); + out_file_ = fopen(filename.c_str(), "wb"); + ASSERT_TRUE(out_file_ != NULL) + << "Could not open file " << filename << "\n"; + } +} + +void ApmTest::EnableAllComponents() { + EnableAllAPComponents(apm_.get()); +} + +bool ApmTest::ReadFrame(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = frame->samples_per_channel * 2; + size_t read_count = + fread(frame->data.data(), sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + EXPECT_NE(0, feof(file)); + return false; // This is expected. + } + + if (frame->num_channels == 1) { + MixStereoToMono(frame->data.data(), frame->data.data(), + frame->samples_per_channel); + } + + if (cb) { + ConvertToFloat(*frame, cb); + } + return true; +} + +bool ApmTest::ReadFrame(FILE* file, Int16FrameData* frame) { + return ReadFrame(file, frame, NULL); +} + +// If the end of the file has been reached, rewind it and attempt to read the +// frame again. +void ApmTest::ReadFrameWithRewind(FILE* file, + Int16FrameData* frame, + ChannelBuffer* cb) { + if (!ReadFrame(near_file_, &frame_, cb)) { + rewind(near_file_); + ASSERT_TRUE(ReadFrame(near_file_, &frame_, cb)); + } +} + +void ApmTest::ReadFrameWithRewind(FILE* file, Int16FrameData* frame) { + ReadFrameWithRewind(file, frame, NULL); +} + +int ApmTest::ProcessStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data()); + } + return apm_->ProcessStream( + float_cb_->channels(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(output_sample_rate_hz_, num_output_channels_), + float_cb_->channels()); +} + +int ApmTest::AnalyzeReverseStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data()); + } + return apm_->AnalyzeReverseStream( + revfloat_cb_->channels(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels)); +} + +void ApmTest::ProcessDelayVerificationTest(int delay_ms, + int system_delay_ms, + int delay_min, + int delay_max) { + // The `revframe_` and `frame_` should include the proper frame information, + // hence can be used for extracting information. + Int16FrameData tmp_frame; + std::queue frame_queue; + bool causal = true; + + tmp_frame.CopyFrom(revframe_); + SetFrameTo(&tmp_frame, 0); + + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + // Initialize the `frame_queue` with empty frames. + int frame_delay = delay_ms / 10; + while (frame_delay < 0) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay++; + causal = false; + } + while (frame_delay > 0) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay--; + } + // Run for 4.5 seconds, skipping statistics from the first 2.5 seconds. We + // need enough frames with audio to have reliable estimates, but as few as + // possible to keep processing time down. 4.5 seconds seemed to be a good + // compromise for this recording. + for (int frame_count = 0; frame_count < 450; ++frame_count) { + Int16FrameData* frame = new Int16FrameData(); + frame->CopyFrom(tmp_frame); + // Use the near end recording, since that has more speech in it. + ASSERT_TRUE(ReadFrame(near_file_, frame)); + frame_queue.push(frame); + Int16FrameData* reverse_frame = frame; + Int16FrameData* process_frame = frame_queue.front(); + if (!causal) { + reverse_frame = frame_queue.front(); + // When we call ProcessStream() the frame is modified, so we can't use the + // pointer directly when things are non-causal. Use an intermediate frame + // and copy the data. + process_frame = &tmp_frame; + process_frame->CopyFrom(*frame); + } + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream( + reverse_frame->data.data(), + StreamConfig(reverse_frame->sample_rate_hz, + reverse_frame->num_channels), + StreamConfig(reverse_frame->sample_rate_hz, + reverse_frame->num_channels), + reverse_frame->data.data())); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(system_delay_ms)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream(process_frame->data.data(), + StreamConfig(process_frame->sample_rate_hz, + process_frame->num_channels), + StreamConfig(process_frame->sample_rate_hz, + process_frame->num_channels), + process_frame->data.data())); + frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + + if (frame_count == 250) { + // Discard the first delay metrics to avoid convergence effects. + static_cast(apm_->GetStatistics()); + } + } + + rewind(near_file_); + while (!frame_queue.empty()) { + Int16FrameData* frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + } + // Calculate expected delay estimate and acceptable regions. Further, + // limit them w.r.t. AEC delay estimation support. + const size_t samples_per_ms = + rtc::SafeMin(16u, frame_.samples_per_channel / 10); + const int expected_median = + rtc::SafeClamp(delay_ms - system_delay_ms, delay_min, delay_max); + const int expected_median_high = rtc::SafeClamp( + expected_median + rtc::dchecked_cast(96 / samples_per_ms), delay_min, + delay_max); + const int expected_median_low = rtc::SafeClamp( + expected_median - rtc::dchecked_cast(96 / samples_per_ms), delay_min, + delay_max); + // Verify delay metrics. + AudioProcessingStats stats = apm_->GetStatistics(); + ASSERT_TRUE(stats.delay_median_ms.has_value()); + int32_t median = *stats.delay_median_ms; + EXPECT_GE(expected_median_high, median); + EXPECT_LE(expected_median_low, median); +} + +void ApmTest::StreamParametersTest(Format format) { + // No errors when the components are disabled. + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // -- Missing AGC level -- + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.enabled = true; + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, ProcessStreamChooser(format)); + apm_config.gain_controller1.enabled = false; + apm_->ApplyConfig(apm_config); + + // -- Missing delay -- + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + apm_config.gain_controller1.enabled = true; + apm_->ApplyConfig(apm_config); + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + apm_config.gain_controller1.enabled = false; + apm_->ApplyConfig(apm_config); + + // -- No stream parameters -- + EXPECT_EQ(apm_->kNoError, AnalyzeReverseStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // -- All there -- + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_analog_level(127); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); +} + +TEST_F(ApmTest, StreamParametersInt) { + StreamParametersTest(kIntFormat); +} + +TEST_F(ApmTest, StreamParametersFloat) { + StreamParametersTest(kFloatFormat); +} + +void ApmTest::TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return) { + frame_.num_channels = num_channels; + + EXPECT_EQ(expected_return, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(expected_return, + apm_->ProcessReverseStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); +} + +void ApmTest::TestChangingForwardChannels( + size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return) { + const StreamConfig input_stream = {frame_.sample_rate_hz, num_in_channels}; + const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; + + EXPECT_EQ(expected_return, + apm_->ProcessStream(float_cb_->channels(), input_stream, + output_stream, float_cb_->channels())); +} + +void ApmTest::TestChangingReverseChannels( + size_t num_rev_channels, + AudioProcessing::Error expected_return) { + const ProcessingConfig processing_config = { + {{frame_.sample_rate_hz, apm_->num_input_channels()}, + {output_sample_rate_hz_, apm_->num_output_channels()}, + {frame_.sample_rate_hz, num_rev_channels}, + {frame_.sample_rate_hz, num_rev_channels}}}; + + EXPECT_EQ( + expected_return, + apm_->ProcessReverseStream( + float_cb_->channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), float_cb_->channels())); +} + +TEST_F(ApmTest, ChannelsInt16Interface) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; i++) { + TestChangingChannelsInt16Interface(i, kNoErr); + EXPECT_EQ(i, apm_->num_input_channels()); + } +} + +TEST_F(ApmTest, Channels) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); + TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; ++i) { + for (size_t j = 0; j < 1; ++j) { + // Output channels much be one or match input channels. + if (j == 1 || i == j) { + TestChangingForwardChannels(i, j, kNoErr); + TestChangingReverseChannels(i, kNoErr); + + EXPECT_EQ(i, apm_->num_input_channels()); + EXPECT_EQ(j, apm_->num_output_channels()); + // The number of reverse channels used for processing to is always 1. + EXPECT_EQ(1u, apm_->num_reverse_channels()); + } else { + TestChangingForwardChannels(i, j, + AudioProcessing::kBadNumberChannelsError); + } + } + } +} + +TEST_F(ApmTest, SampleRatesInt) { + // Testing some valid sample rates. + for (int sample_rate : {8000, 12000, 16000, 32000, 44100, 48000, 96000}) { + SetContainerFormat(sample_rate, 2, &frame_, &float_cb_); + EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); + } +} + +// This test repeatedly reconfigures the pre-amplifier in APM, processes a +// number of frames, and checks that output signal has the right level. +TEST_F(ApmTest, PreAmplifier) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 10000 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + auto compute_power = [](const Int16FrameData& frame) { + rtc::ArrayView data = GetFrameData(frame); + return std::accumulate(data.begin(), data.end(), 0.0f, + [](float a, float b) { return a + b * b; }) / + data.size() / 32768 / 32768; + }; + + const float input_power = compute_power(tmp_frame); + // Double-check that the input data is large compared to the error kEpsilon. + constexpr float kEpsilon = 1e-4f; + RTC_DCHECK_GE(input_power, 10 * kEpsilon); + + // 1. Enable pre-amp with 0 dB gain. + AudioProcessing::Config config = apm_->GetConfig(); + config.pre_amplifier.enabled = true; + config.pre_amplifier.fixed_gain_factor = 1.0f; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + float output_power = compute_power(frame_); + EXPECT_NEAR(output_power, input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 1.0f); + + // 2. Change pre-amp gain via ApplyConfig. + config.pre_amplifier.fixed_gain_factor = 2.0f; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, 4 * input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 2.0f); + + // 3. Change pre-amp gain via a RuntimeSetting. + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(1.5f)); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, 2.25 * input_power, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.pre_amplifier.fixed_gain_factor, 1.5f); +} + +// Ensures that the emulated analog mic gain functionality runs without +// crashing. +TEST_F(ApmTest, AnalogMicGainEmulation) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 100 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + // Enable the analog gain emulation. + AudioProcessing::Config config = apm_->GetConfig(); + config.capture_level_adjustment.enabled = true; + config.capture_level_adjustment.analog_mic_gain_emulation.enabled = true; + config.capture_level_adjustment.analog_mic_gain_emulation.initial_level = 21; + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::Mode::kAdaptiveAnalog; + config.gain_controller1.analog_gain_controller.enabled = true; + apm_->ApplyConfig(config); + + // Process a number of frames to ensure that the code runs without crashes. + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } +} + +// This test repeatedly reconfigures the capture level adjustment functionality +// in APM, processes a number of frames, and checks that output signal has the +// right level. +TEST_F(ApmTest, CaptureLevelAdjustment) { + // Fill the audio frame with a sawtooth pattern. + rtc::ArrayView frame_data = GetMutableFrameData(&frame_); + const size_t samples_per_channel = frame_.samples_per_channel; + for (size_t i = 0; i < samples_per_channel; i++) { + for (size_t ch = 0; ch < frame_.num_channels; ++ch) { + frame_data[i + ch * samples_per_channel] = 100 * ((i % 3) - 1); + } + } + // Cache the frame in tmp_frame. + Int16FrameData tmp_frame; + tmp_frame.CopyFrom(frame_); + + auto compute_power = [](const Int16FrameData& frame) { + rtc::ArrayView data = GetFrameData(frame); + return std::accumulate(data.begin(), data.end(), 0.0f, + [](float a, float b) { return a + b * b; }) / + data.size() / 32768 / 32768; + }; + + const float input_power = compute_power(tmp_frame); + // Double-check that the input data is large compared to the error kEpsilon. + constexpr float kEpsilon = 1e-20f; + RTC_DCHECK_GE(input_power, 10 * kEpsilon); + + // 1. Enable pre-amp with 0 dB gain. + AudioProcessing::Config config = apm_->GetConfig(); + config.capture_level_adjustment.enabled = true; + config.capture_level_adjustment.pre_gain_factor = 0.5f; + config.capture_level_adjustment.post_gain_factor = 4.f; + const float expected_output_power1 = + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.post_gain_factor * + config.capture_level_adjustment.post_gain_factor * input_power; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + float output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power1, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 0.5f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 4.f); + + // 2. Change pre-amp gain via ApplyConfig. + config.capture_level_adjustment.pre_gain_factor = 1.0f; + config.capture_level_adjustment.post_gain_factor = 2.f; + const float expected_output_power2 = + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.pre_gain_factor * + config.capture_level_adjustment.post_gain_factor * + config.capture_level_adjustment.post_gain_factor * input_power; + apm_->ApplyConfig(config); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power2, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 1.0f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 2.f); + + // 3. Change pre-amp gain via a RuntimeSetting. + constexpr float kPreGain3 = 0.5f; + constexpr float kPostGain3 = 3.f; + const float expected_output_power3 = + kPreGain3 * kPreGain3 * kPostGain3 * kPostGain3 * input_power; + + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kPreGain3)); + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePostGain(kPostGain3)); + + for (int i = 0; i < 20; ++i) { + frame_.CopyFrom(tmp_frame); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kIntFormat)); + } + output_power = compute_power(frame_); + EXPECT_NEAR(output_power, expected_output_power3, kEpsilon); + config = apm_->GetConfig(); + EXPECT_EQ(config.capture_level_adjustment.pre_gain_factor, 0.5f); + EXPECT_EQ(config.capture_level_adjustment.post_gain_factor, 3.f); +} + +TEST_F(ApmTest, GainControl) { + AudioProcessing::Config config = apm_->GetConfig(); + config.gain_controller1.enabled = false; + apm_->ApplyConfig(config); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + + // Testing gain modes + for (auto mode : + {AudioProcessing::Config::GainController1::kAdaptiveDigital, + AudioProcessing::Config::GainController1::kFixedDigital, + AudioProcessing::Config::GainController1::kAdaptiveAnalog}) { + config.gain_controller1.mode = mode; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing target levels + for (int target_level_dbfs : {0, 15, 31}) { + config.gain_controller1.target_level_dbfs = target_level_dbfs; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing compression gains + for (int compression_gain_db : {0, 10, 90}) { + config.gain_controller1.compression_gain_db = compression_gain_db; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing limiter off/on + for (bool enable : {false, true}) { + config.gain_controller1.enable_limiter = enable; + apm_->ApplyConfig(config); + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + } + + // Testing level limits. + constexpr int kMinLevel = 0; + constexpr int kMaxLevel = 255; + apm_->set_stream_analog_level(kMinLevel); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + apm_->set_stream_analog_level((kMinLevel + kMaxLevel) / 2); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); + apm_->set_stream_analog_level(kMaxLevel); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(kFloatFormat)); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +using ApmDeathTest = ApmTest; + +TEST_F(ApmDeathTest, GainControlDiesOnTooLowTargetLevelDbfs) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.target_level_dbfs = -1; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooHighTargetLevelDbfs) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.target_level_dbfs = 32; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooLowCompressionGainDb) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.compression_gain_db = -1; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, GainControlDiesOnTooHighCompressionGainDb) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.compression_gain_db = 91; + EXPECT_DEATH(apm_->ApplyConfig(config), ""); +} + +TEST_F(ApmDeathTest, ApmDiesOnTooLowAnalogLevel) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + EXPECT_DEATH(apm_->set_stream_analog_level(-1), ""); +} + +TEST_F(ApmDeathTest, ApmDiesOnTooHighAnalogLevel) { + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + apm_->ApplyConfig(config); + EXPECT_DEATH(apm_->set_stream_analog_level(256), ""); +} +#endif + +void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; + apm_->ApplyConfig(config); + + int out_analog_level = 0; + for (int i = 0; i < 2000; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(&frame_, 0.25); + + // Always pass in the same volume. + apm_->set_stream_analog_level(100); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + } + + // Ensure the AGC is still able to reach the maximum. + EXPECT_EQ(255, out_analog_level); +} + +// Verifies that despite volume slider quantization, the AGC can continue to +// increase its volume. +TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunQuantizedVolumeDoesNotGetStuckTest(sample_rate_hz); + } +} + +void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + auto config = apm_->GetConfig(); + config.gain_controller1.enabled = true; + config.gain_controller1.mode = + AudioProcessing::Config::GainController1::kAdaptiveAnalog; + apm_->ApplyConfig(config); + + int out_analog_level = 100; + for (int i = 0; i < 1000; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(&frame_, 0.25); + + apm_->set_stream_analog_level(out_analog_level); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + } + + // Ensure the volume was raised. + EXPECT_GT(out_analog_level, 100); + int highest_level_reached = out_analog_level; + // Simulate a user manual volume change. + out_analog_level = 100; + + for (int i = 0; i < 300; ++i) { + ReadFrameWithRewind(near_file_, &frame_); + ScaleFrame(&frame_, 0.25); + + apm_->set_stream_analog_level(out_analog_level); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + out_analog_level = apm_->recommended_stream_analog_level(); + // Check that AGC respected the manually adjusted volume. + EXPECT_LT(out_analog_level, highest_level_reached); + } + // Check that the volume was still raised. + EXPECT_GT(out_analog_level, 100); +} + +TEST_F(ApmTest, ManualVolumeChangeIsPossible) { + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunManualVolumeChangeIsPossibleTest(sample_rate_hz); + } +} + +TEST_F(ApmTest, HighPassFilter) { + // Turn HP filter on/off + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = true; + apm_->ApplyConfig(apm_config); + apm_config.high_pass_filter.enabled = false; + apm_->ApplyConfig(apm_config); +} + +TEST_F(ApmTest, AllProcessingDisabledByDefault) { + AudioProcessing::Config config = apm_->GetConfig(); + EXPECT_FALSE(config.echo_canceller.enabled); + EXPECT_FALSE(config.high_pass_filter.enabled); + EXPECT_FALSE(config.gain_controller1.enabled); + EXPECT_FALSE(config.noise_suppression.enabled); +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledInt) { + // Test that ProcessStream simply copies input to output when all components + // are disabled. + // Runs over all processing rates, and some particularly common or special + // rates. + // - 8000 Hz: lowest sample rate seen in Chrome metrics, + // - 22050 Hz: APM input/output frames are not exactly 10 ms, + // - 44100 Hz: very common desktop sample rate. + constexpr int kSampleRatesHz[] = {8000, 16000, 22050, 32000, 44100, 48000}; + for (size_t sample_rate_hz : kSampleRatesHz) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + Init(sample_rate_hz, sample_rate_hz, sample_rate_hz, 2, 2, 2, false); + SetFrameTo(&frame_, 1000, 2000); + Int16FrameData frame_copy; + frame_copy.CopyFrom(frame_); + for (int j = 0; j < 1000; j++) { + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + } + } +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { + // Test that ProcessStream simply copies input to output when all components + // are disabled. + const size_t kSamples = 160; + const int sample_rate = 16000; + const float src[kSamples] = {-1.0f, 0.0f, 1.0f}; + float dest[kSamples] = {}; + + auto src_channels = &src[0]; + auto dest_channels = &dest[0]; + + apm_ = AudioProcessingBuilderForTesting().Create(); + EXPECT_NOERR(apm_->ProcessStream(&src_channels, StreamConfig(sample_rate, 1), + StreamConfig(sample_rate, 1), + &dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], dest[i]); + } + + // Same for ProcessReverseStream. + float rev_dest[kSamples] = {}; + auto rev_dest_channels = &rev_dest[0]; + + StreamConfig input_stream = {sample_rate, 1}; + StreamConfig output_stream = {sample_rate, 1}; + EXPECT_NOERR(apm_->ProcessReverseStream(&src_channels, input_stream, + output_stream, &rev_dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], rev_dest[i]); + } +} + +TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { + EnableAllComponents(); + + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { + Init(kProcessSampleRates[i], kProcessSampleRates[i], kProcessSampleRates[i], + 2, 2, 2, false); + int analog_level = 127; + ASSERT_EQ(0, feof(far_file_)); + ASSERT_EQ(0, feof(near_file_)); + while (ReadFrame(far_file_, &revframe_) && ReadFrame(near_file_, &frame_)) { + CopyLeftToRightChannel(revframe_.data.data(), + revframe_.samples_per_channel); + + ASSERT_EQ( + kNoErr, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + + CopyLeftToRightChannel(frame_.data.data(), frame_.samples_per_channel); + + ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); + apm_->set_stream_analog_level(analog_level); + ASSERT_EQ(kNoErr, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + analog_level = apm_->recommended_stream_analog_level(); + + VerifyChannelsAreEqual(frame_.data.data(), frame_.samples_per_channel); + } + rewind(far_file_); + rewind(near_file_); + } +} + +TEST_F(ApmTest, SplittingFilter) { + // Verify the filter is not active through undistorted audio when: + // 1. No components are enabled... + SetFrameTo(&frame_, 1000); + Int16FrameData frame_copy; + frame_copy.CopyFrom(frame_); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + + // 2. Only the level estimator is enabled... + auto apm_config = apm_->GetConfig(); + SetFrameTo(&frame_, 1000); + frame_copy.CopyFrom(frame_); + apm_->ApplyConfig(apm_config); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); + apm_->ApplyConfig(apm_config); + + // Check the test is valid. We should have distortion from the filter + // when AEC is enabled (which won't affect the audio). + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = false; + apm_->ApplyConfig(apm_config); + frame_.samples_per_channel = 320; + frame_.num_channels = 2; + frame_.sample_rate_hz = 32000; + SetFrameTo(&frame_, 1000); + frame_copy.CopyFrom(frame_); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_FALSE(FrameDataAreEqual(frame_, frame_copy)); +} + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +void ApmTest::ProcessDebugDump(absl::string_view in_filename, + absl::string_view out_filename, + Format format, + int max_size_bytes) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + FILE* in_file = fopen(std::string(in_filename).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + audioproc::Event event_msg; + bool first_init = true; + + while (ReadMessageFromFile(in_file, &event_msg)) { + if (event_msg.type() == audioproc::Event::INIT) { + const audioproc::Init msg = event_msg.init(); + int reverse_sample_rate = msg.sample_rate(); + if (msg.has_reverse_sample_rate()) { + reverse_sample_rate = msg.reverse_sample_rate(); + } + int output_sample_rate = msg.sample_rate(); + if (msg.has_output_sample_rate()) { + output_sample_rate = msg.output_sample_rate(); + } + + Init(msg.sample_rate(), output_sample_rate, reverse_sample_rate, + msg.num_input_channels(), msg.num_output_channels(), + msg.num_reverse_channels(), false); + if (first_init) { + // AttachAecDump() writes an additional init message. Don't start + // recording until after the first init to avoid the extra message. + auto aec_dump = + AecDumpFactory::Create(out_filename, max_size_bytes, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + first_init = false; + } + + } else if (event_msg.type() == audioproc::Event::REVERSE_STREAM) { + const audioproc::ReverseStream msg = event_msg.reverse_stream(); + + if (msg.channel_size() > 0) { + ASSERT_EQ(revframe_.num_channels, + static_cast(msg.channel_size())); + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(revfloat_cb_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + } else { + memcpy(revframe_.data.data(), msg.data().data(), msg.data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(revframe_, revfloat_cb_.get()); + } + } + AnalyzeReverseStreamChooser(format); + + } else if (event_msg.type() == audioproc::Event::STREAM) { + const audioproc::Stream msg = event_msg.stream(); + // ProcessStream could have changed this for the output frame. + frame_.num_channels = apm_->num_input_channels(); + + apm_->set_stream_analog_level(msg.applied_input_volume()); + EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay())); + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + if (msg.input_channel_size() > 0) { + ASSERT_EQ(frame_.num_channels, + static_cast(msg.input_channel_size())); + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(float_cb_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } else { + memcpy(frame_.data.data(), msg.input_data().data(), + msg.input_data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(frame_, float_cb_.get()); + } + } + ProcessStreamChooser(format); + } + } + apm_->DetachAecDump(); + fclose(in_file); +} + +void ApmTest::VerifyDebugDumpTest(Format format) { + rtc::ScopedFakeClock fake_clock; + const std::string in_filename = test::ResourcePath("ref03", "aecdump"); + std::string format_string; + switch (format) { + case kIntFormat: + format_string = "_int"; + break; + case kFloatFormat: + format_string = "_float"; + break; + } + const std::string ref_filename = test::TempFilename( + test::OutputPath(), std::string("ref") + format_string + "_aecdump"); + const std::string out_filename = test::TempFilename( + test::OutputPath(), std::string("out") + format_string + "_aecdump"); + const std::string limited_filename = test::TempFilename( + test::OutputPath(), std::string("limited") + format_string + "_aecdump"); + const size_t logging_limit_bytes = 100000; + // We expect at least this many bytes in the created logfile. + const size_t logging_expected_bytes = 95000; + EnableAllComponents(); + ProcessDebugDump(in_filename, ref_filename, format, -1); + ProcessDebugDump(ref_filename, out_filename, format, -1); + ProcessDebugDump(ref_filename, limited_filename, format, logging_limit_bytes); + + FILE* ref_file = fopen(ref_filename.c_str(), "rb"); + FILE* out_file = fopen(out_filename.c_str(), "rb"); + FILE* limited_file = fopen(limited_filename.c_str(), "rb"); + ASSERT_TRUE(ref_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(limited_file != NULL); + std::unique_ptr ref_bytes; + std::unique_ptr out_bytes; + std::unique_ptr limited_bytes; + + size_t ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + size_t out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + size_t limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + size_t bytes_read = 0; + size_t bytes_read_limited = 0; + while (ref_size > 0 && out_size > 0) { + bytes_read += ref_size; + bytes_read_limited += limited_size; + EXPECT_EQ(ref_size, out_size); + EXPECT_GE(ref_size, limited_size); + EXPECT_TRUE(ExpectMessageEq(/*actual=*/{out_bytes.get(), out_size}, + /*expected=*/{ref_bytes.get(), ref_size})); + if (limited_size > 0) { + EXPECT_TRUE( + ExpectMessageEq(/*actual=*/{limited_bytes.get(), limited_size}, + /*expected=*/{ref_bytes.get(), ref_size})); + } + ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + } + EXPECT_GT(bytes_read, 0u); + EXPECT_GT(bytes_read_limited, logging_expected_bytes); + EXPECT_LE(bytes_read_limited, logging_limit_bytes); + EXPECT_NE(0, feof(ref_file)); + EXPECT_NE(0, feof(out_file)); + EXPECT_NE(0, feof(limited_file)); + ASSERT_EQ(0, fclose(ref_file)); + ASSERT_EQ(0, fclose(out_file)); + ASSERT_EQ(0, fclose(limited_file)); + remove(ref_filename.c_str()); + remove(out_filename.c_str()); + remove(limited_filename.c_str()); +} + +TEST_F(ApmTest, VerifyDebugDumpInt) { + VerifyDebugDumpTest(kIntFormat); +} + +TEST_F(ApmTest, VerifyDebugDumpFloat) { + VerifyDebugDumpTest(kFloatFormat); +} +#endif + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDump) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + { + auto aec_dump = AecDumpFactory::Create("", -1, &worker_queue); + EXPECT_FALSE(aec_dump); + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(filename, -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + apm_->DetachAecDump(); + + // Verify the file has been written. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#else + // Verify the file has NOT been written. + ASSERT_TRUE(fopen(filename.c_str(), "r") == NULL); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDumpFromFileHandle) { + TaskQueueForTest worker_queue("ApmTest_worker_queue"); + + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + FileWrapper f = FileWrapper::OpenWriteOnly(filename); + ASSERT_TRUE(f.is_open()); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(std::move(f), -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + apm_->DetachAecDump(); + + // Verify the file has been written. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +// TODO(andrew): Add a test to process a few frames with different combinations +// of enabled components. + +TEST_F(ApmTest, Process) { + GOOGLE_PROTOBUF_VERIFY_VERSION; + audioproc::OutputData ref_data; + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + OpenFileAndReadMessage(ref_filename_, &ref_data); + } else { + const int kChannels[] = {1, 2}; + // Write the desired tests to the protobuf reference file. + for (size_t i = 0; i < arraysize(kChannels); i++) { + for (size_t j = 0; j < arraysize(kChannels); j++) { + for (int sample_rate_hz : AudioProcessing::kNativeSampleRatesHz) { + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(kChannels[i]); + test->set_num_input_channels(kChannels[j]); + test->set_num_output_channels(kChannels[j]); + test->set_sample_rate(sample_rate_hz); + test->set_use_aec_extended_filter(false); + } + } + } +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + // To test the extended filter mode. + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(2); + test->set_num_input_channels(2); + test->set_num_output_channels(2); + test->set_sample_rate(AudioProcessing::kSampleRate32kHz); + test->set_use_aec_extended_filter(true); +#endif + } + + for (int i = 0; i < ref_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); + + audioproc::Test* test = ref_data.mutable_test(i); + // TODO(ajm): We no longer allow different input and output channels. Skip + // these tests for now, but they should be removed from the set. + if (test->num_input_channels() != test->num_output_channels()) + continue; + + apm_ = AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + AudioProcessing::Config apm_config = apm_->GetConfig(); + apm_config.gain_controller1.analog_gain_controller.enabled = false; + apm_->ApplyConfig(apm_config); + + EnableAllComponents(); + + Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), + static_cast(test->num_input_channels()), + static_cast(test->num_output_channels()), + static_cast(test->num_reverse_channels()), true); + + int frame_count = 0; + int analog_level = 127; + int analog_level_average = 0; + int max_output_average = 0; +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + int stats_index = 0; +#endif + + while (ReadFrame(far_file_, &revframe_) && ReadFrame(near_file_, &frame_)) { + EXPECT_EQ( + apm_->kNoError, + apm_->ProcessReverseStream( + revframe_.data.data(), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), + revframe_.data.data())); + + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->set_stream_analog_level(analog_level); + + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream( + frame_.data.data(), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + StreamConfig(frame_.sample_rate_hz, frame_.num_channels), + frame_.data.data())); + + // Ensure the frame was downmixed properly. + EXPECT_EQ(static_cast(test->num_output_channels()), + frame_.num_channels); + + max_output_average += MaxAudioFrame(frame_); + + analog_level = apm_->recommended_stream_analog_level(); + analog_level_average += analog_level; + AudioProcessingStats stats = apm_->GetStatistics(); + + size_t frame_size = frame_.samples_per_channel * frame_.num_channels; + size_t write_count = + fwrite(frame_.data.data(), sizeof(int16_t), frame_size, out_file_); + ASSERT_EQ(frame_size, write_count); + + // Reset in case of downmixing. + frame_.num_channels = static_cast(test->num_input_channels()); + frame_count++; + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + const int kStatsAggregationFrameNum = 100; // 1 second. + if (frame_count % kStatsAggregationFrameNum == 0) { + // Get echo and delay metrics. + AudioProcessingStats stats2 = apm_->GetStatistics(); + + // Echo metrics. + const float echo_return_loss = stats2.echo_return_loss.value_or(-1.0f); + const float echo_return_loss_enhancement = + stats2.echo_return_loss_enhancement.value_or(-1.0f); + const float residual_echo_likelihood = + stats2.residual_echo_likelihood.value_or(-1.0f); + const float residual_echo_likelihood_recent_max = + stats2.residual_echo_likelihood_recent_max.value_or(-1.0f); + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + const audioproc::Test::EchoMetrics& reference = + test->echo_metrics(stats_index); + constexpr float kEpsilon = 0.01; + EXPECT_NEAR(echo_return_loss, reference.echo_return_loss(), kEpsilon); + EXPECT_NEAR(echo_return_loss_enhancement, + reference.echo_return_loss_enhancement(), kEpsilon); + EXPECT_NEAR(residual_echo_likelihood, + reference.residual_echo_likelihood(), kEpsilon); + EXPECT_NEAR(residual_echo_likelihood_recent_max, + reference.residual_echo_likelihood_recent_max(), + kEpsilon); + ++stats_index; + } else { + audioproc::Test::EchoMetrics* message_echo = test->add_echo_metrics(); + message_echo->set_echo_return_loss(echo_return_loss); + message_echo->set_echo_return_loss_enhancement( + echo_return_loss_enhancement); + message_echo->set_residual_echo_likelihood(residual_echo_likelihood); + message_echo->set_residual_echo_likelihood_recent_max( + residual_echo_likelihood_recent_max); + } + } +#endif // defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE). + } + max_output_average /= frame_count; + analog_level_average /= frame_count; + + if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { + const int kIntNear = 1; + // All numbers being consistently higher on N7 compare to the reference + // data. + // TODO(bjornv): If we start getting more of these offsets on Android we + // should consider a different approach. Either using one slack for all, + // or generate a separate android reference. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + const int kMaxOutputAverageOffset = 9; + const int kMaxOutputAverageNear = 26; +#else + const int kMaxOutputAverageOffset = 0; + const int kMaxOutputAverageNear = kIntNear; +#endif + EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear); + EXPECT_NEAR(test->max_output_average(), + max_output_average - kMaxOutputAverageOffset, + kMaxOutputAverageNear); + } else { + test->set_analog_level_average(analog_level_average); + test->set_max_output_average(max_output_average); + } + + rewind(far_file_); + rewind(near_file_); + } + + if (absl::GetFlag(FLAGS_write_apm_ref_data)) { + OpenFileAndWriteMessage(ref_filename_, ref_data); + } +} + +// Compares the reference and test arrays over a region around the expected +// delay. Finds the highest SNR in that region and adds the variance and squared +// error results to the supplied accumulators. +void UpdateBestSNR(const float* ref, + const float* test, + size_t length, + int expected_delay, + double* variance_acc, + double* sq_error_acc) { + RTC_CHECK_LT(expected_delay, length) + << "delay greater than signal length, cannot compute SNR"; + double best_snr = std::numeric_limits::min(); + double best_variance = 0; + double best_sq_error = 0; + // Search over a region of nine samples around the expected delay. + for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; + ++delay) { + double sq_error = 0; + double variance = 0; + for (size_t i = 0; i < length - delay; ++i) { + double error = test[i + delay] - ref[i]; + sq_error += error * error; + variance += ref[i] * ref[i]; + } + + if (sq_error == 0) { + *variance_acc += variance; + return; + } + double snr = variance / sq_error; + if (snr > best_snr) { + best_snr = snr; + best_variance = variance; + best_sq_error = sq_error; + } + } + + *variance_acc += best_variance; + *sq_error_acc += best_sq_error; +} + +// Used to test a multitude of sample rate and channel combinations. It works +// by first producing a set of reference files (in SetUpTestCase) that are +// assumed to be correct, as the used parameters are verified by other tests +// in this collection. Primarily the reference files are all produced at +// "native" rates which do not involve any resampling. + +// Each test pass produces an output file with a particular format. The output +// is matched against the reference file closest to its internal processing +// format. If necessary the output is resampled back to its process format. +// Due to the resampling distortion, we don't expect identical results, but +// enforce SNR thresholds which vary depending on the format. 0 is a special +// case SNR which corresponds to inf, or zero error. +typedef std::tuple AudioProcessingTestData; +class AudioProcessingTest + : public ::testing::TestWithParam { + public: + AudioProcessingTest() + : input_rate_(std::get<0>(GetParam())), + output_rate_(std::get<1>(GetParam())), + reverse_input_rate_(std::get<2>(GetParam())), + reverse_output_rate_(std::get<3>(GetParam())), + expected_snr_(std::get<4>(GetParam())), + expected_reverse_snr_(std::get<5>(GetParam())) {} + + virtual ~AudioProcessingTest() {} + + static void SetUpTestSuite() { + // Create all needed output reference files. + const size_t kNumChannels[] = {1, 2}; + for (size_t i = 0; i < arraysize(kProcessSampleRates); ++i) { + for (size_t j = 0; j < arraysize(kNumChannels); ++j) { + for (size_t k = 0; k < arraysize(kNumChannels); ++k) { + // The reference files always have matching input and output channels. + ProcessFormat(kProcessSampleRates[i], kProcessSampleRates[i], + kProcessSampleRates[i], kProcessSampleRates[i], + kNumChannels[j], kNumChannels[j], kNumChannels[k], + kNumChannels[k], "ref"); + } + } + } + } + + void TearDown() { + // Remove "out" files after each test. + ClearTempOutFiles(); + } + + static void TearDownTestSuite() { ClearTempFiles(); } + + // Runs a process pass on files with the given parameters and dumps the output + // to a file specified with `output_file_prefix`. Both forward and reverse + // output streams are dumped. + static void ProcessFormat(int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + absl::string_view output_file_prefix) { + AudioProcessing::Config apm_config; + apm_config.gain_controller1.analog_gain_controller.enabled = false; + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); + + EnableAllAPComponents(ap.get()); + + ProcessingConfig processing_config = { + {{input_rate, num_input_channels}, + {output_rate, num_output_channels}, + {reverse_input_rate, num_reverse_input_channels}, + {reverse_output_rate, num_reverse_output_channels}}}; + ap->Initialize(processing_config); + + FILE* far_file = + fopen(ResourceFilePath("far", reverse_input_rate).c_str(), "rb"); + FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); + FILE* out_file = fopen( + OutputFilePath( + output_file_prefix, input_rate, output_rate, reverse_input_rate, + reverse_output_rate, num_input_channels, num_output_channels, + num_reverse_input_channels, num_reverse_output_channels, kForward) + .c_str(), + "wb"); + FILE* rev_out_file = fopen( + OutputFilePath( + output_file_prefix, input_rate, output_rate, reverse_input_rate, + reverse_output_rate, num_input_channels, num_output_channels, + num_reverse_input_channels, num_reverse_output_channels, kReverse) + .c_str(), + "wb"); + ASSERT_TRUE(far_file != NULL); + ASSERT_TRUE(near_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(rev_out_file != NULL); + + ChannelBuffer fwd_cb(AudioProcessing::GetFrameSize(input_rate), + num_input_channels); + ChannelBuffer rev_cb( + AudioProcessing::GetFrameSize(reverse_input_rate), + num_reverse_input_channels); + ChannelBuffer out_cb(AudioProcessing::GetFrameSize(output_rate), + num_output_channels); + ChannelBuffer rev_out_cb( + AudioProcessing::GetFrameSize(reverse_output_rate), + num_reverse_output_channels); + + // Temporary buffers. + const int max_length = + 2 * std::max(std::max(out_cb.num_frames(), rev_out_cb.num_frames()), + std::max(fwd_cb.num_frames(), rev_cb.num_frames())); + std::unique_ptr float_data(new float[max_length]); + std::unique_ptr int_data(new int16_t[max_length]); + + int analog_level = 127; + while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) && + ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) { + EXPECT_NOERR(ap->ProcessReverseStream( + rev_cb.channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), rev_out_cb.channels())); + + EXPECT_NOERR(ap->set_stream_delay_ms(0)); + ap->set_stream_analog_level(analog_level); + + EXPECT_NOERR(ap->ProcessStream( + fwd_cb.channels(), StreamConfig(input_rate, num_input_channels), + StreamConfig(output_rate, num_output_channels), out_cb.channels())); + + // Dump forward output to file. + Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(), + float_data.get()); + size_t out_length = out_cb.num_channels() * out_cb.num_frames(); + + ASSERT_EQ(out_length, fwrite(float_data.get(), sizeof(float_data[0]), + out_length, out_file)); + + // Dump reverse output to file. + Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(), + rev_out_cb.num_channels(), float_data.get()); + size_t rev_out_length = + rev_out_cb.num_channels() * rev_out_cb.num_frames(); + + ASSERT_EQ(rev_out_length, fwrite(float_data.get(), sizeof(float_data[0]), + rev_out_length, rev_out_file)); + + analog_level = ap->recommended_stream_analog_level(); + } + fclose(far_file); + fclose(near_file); + fclose(out_file); + fclose(rev_out_file); + } + + protected: + int input_rate_; + int output_rate_; + int reverse_input_rate_; + int reverse_output_rate_; + double expected_snr_; + double expected_reverse_snr_; +}; + +TEST_P(AudioProcessingTest, Formats) { + struct ChannelFormat { + int num_input; + int num_output; + int num_reverse_input; + int num_reverse_output; + }; + ChannelFormat cf[] = { + {1, 1, 1, 1}, {1, 1, 2, 1}, {2, 1, 1, 1}, + {2, 1, 2, 1}, {2, 2, 1, 1}, {2, 2, 2, 2}, + }; + + for (size_t i = 0; i < arraysize(cf); ++i) { + ProcessFormat(input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, cf[i].num_output, + cf[i].num_reverse_input, cf[i].num_reverse_output, "out"); + + // Verify output for both directions. + std::vector stream_directions; + stream_directions.push_back(kForward); + stream_directions.push_back(kReverse); + for (StreamDirection file_direction : stream_directions) { + const int in_rate = file_direction ? reverse_input_rate_ : input_rate_; + const int out_rate = file_direction ? reverse_output_rate_ : output_rate_; + const int out_num = + file_direction ? cf[i].num_reverse_output : cf[i].num_output; + const double expected_snr = + file_direction ? expected_reverse_snr_ : expected_snr_; + + const int min_ref_rate = std::min(in_rate, out_rate); + int ref_rate; + if (min_ref_rate > 32000) { + ref_rate = 48000; + } else if (min_ref_rate > 16000) { + ref_rate = 32000; + } else { + ref_rate = 16000; + } + + FILE* out_file = fopen( + OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, + cf[i].num_output, cf[i].num_reverse_input, + cf[i].num_reverse_output, file_direction) + .c_str(), + "rb"); + // The reference files always have matching input and output channels. + FILE* ref_file = + fopen(OutputFilePath("ref", ref_rate, ref_rate, ref_rate, ref_rate, + cf[i].num_output, cf[i].num_output, + cf[i].num_reverse_output, + cf[i].num_reverse_output, file_direction) + .c_str(), + "rb"); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(ref_file != NULL); + + const size_t ref_length = + AudioProcessing::GetFrameSize(ref_rate) * out_num; + const size_t out_length = + AudioProcessing::GetFrameSize(out_rate) * out_num; + // Data from the reference file. + std::unique_ptr ref_data(new float[ref_length]); + // Data from the output file. + std::unique_ptr out_data(new float[out_length]); + // Data from the resampled output, in case the reference and output rates + // don't match. + std::unique_ptr cmp_data(new float[ref_length]); + + PushResampler resampler; + resampler.InitializeIfNeeded(out_rate, ref_rate, out_num); + + // Compute the resampling delay of the output relative to the reference, + // to find the region over which we should search for the best SNR. + float expected_delay_sec = 0; + if (in_rate != ref_rate) { + // Input resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(in_rate); + } + if (out_rate != ref_rate) { + // Output resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(ref_rate); + // Delay of converting the output back to its processing rate for + // testing. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(out_rate); + } + // The delay is multiplied by the number of channels because + // UpdateBestSNR() computes the SNR over interleaved data without taking + // channels into account. + int expected_delay = + std::floor(expected_delay_sec * ref_rate + 0.5f) * out_num; + + double variance = 0; + double sq_error = 0; + while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) && + fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) { + float* out_ptr = out_data.get(); + if (out_rate != ref_rate) { + // Resample the output back to its internal processing rate if + // necessary. + ASSERT_EQ(ref_length, + static_cast(resampler.Resample( + out_ptr, out_length, cmp_data.get(), ref_length))); + out_ptr = cmp_data.get(); + } + + // Update the `sq_error` and `variance` accumulators with the highest + // SNR of reference vs output. + UpdateBestSNR(ref_data.get(), out_ptr, ref_length, expected_delay, + &variance, &sq_error); + } + + std::cout << "(" << input_rate_ << ", " << output_rate_ << ", " + << reverse_input_rate_ << ", " << reverse_output_rate_ << ", " + << cf[i].num_input << ", " << cf[i].num_output << ", " + << cf[i].num_reverse_input << ", " << cf[i].num_reverse_output + << ", " << file_direction << "): "; + if (sq_error > 0) { + double snr = 10 * log10(variance / sq_error); + EXPECT_GE(snr, expected_snr); + EXPECT_NE(0, expected_snr); + std::cout << "SNR=" << snr << " dB" << std::endl; + } else { + std::cout << "SNR=inf dB" << std::endl; + } + + fclose(out_file); + fclose(ref_file); + } + } +} + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +INSTANTIATE_TEST_SUITE_P( + CommonFormats, + AudioProcessingTest, + // Internal processing rates and the particularly common sample rate 44100 + // Hz are tested in a grid of combinations (capture in, render in, out). + ::testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 0, 0), + std::make_tuple(48000, 48000, 32000, 48000, 40, 30), + std::make_tuple(48000, 48000, 16000, 48000, 40, 20), + std::make_tuple(48000, 44100, 48000, 44100, 20, 20), + std::make_tuple(48000, 44100, 32000, 44100, 20, 15), + std::make_tuple(48000, 44100, 16000, 44100, 20, 15), + std::make_tuple(48000, 32000, 48000, 32000, 30, 35), + std::make_tuple(48000, 32000, 32000, 32000, 30, 0), + std::make_tuple(48000, 32000, 16000, 32000, 30, 20), + std::make_tuple(48000, 16000, 48000, 16000, 25, 20), + std::make_tuple(48000, 16000, 32000, 16000, 25, 20), + std::make_tuple(48000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 30, 0), + std::make_tuple(44100, 48000, 32000, 48000, 30, 30), + std::make_tuple(44100, 48000, 16000, 48000, 30, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20), + std::make_tuple(44100, 44100, 32000, 44100, 20, 15), + std::make_tuple(44100, 44100, 16000, 44100, 20, 15), + std::make_tuple(44100, 32000, 48000, 32000, 30, 35), + std::make_tuple(44100, 32000, 32000, 32000, 30, 0), + std::make_tuple(44100, 32000, 16000, 32000, 30, 20), + std::make_tuple(44100, 16000, 48000, 16000, 25, 20), + std::make_tuple(44100, 16000, 32000, 16000, 25, 20), + std::make_tuple(44100, 16000, 16000, 16000, 25, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 15, 0), + std::make_tuple(32000, 48000, 32000, 48000, 15, 30), + std::make_tuple(32000, 48000, 16000, 48000, 15, 20), + std::make_tuple(32000, 44100, 48000, 44100, 19, 20), + std::make_tuple(32000, 44100, 32000, 44100, 19, 15), + std::make_tuple(32000, 44100, 16000, 44100, 19, 15), + std::make_tuple(32000, 32000, 48000, 32000, 40, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 39, 20), + std::make_tuple(32000, 16000, 48000, 16000, 25, 20), + std::make_tuple(32000, 16000, 32000, 16000, 25, 20), + std::make_tuple(32000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 9, 0), + std::make_tuple(16000, 48000, 32000, 48000, 9, 30), + std::make_tuple(16000, 48000, 16000, 48000, 9, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 25, 35), + std::make_tuple(16000, 32000, 32000, 32000, 25, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 39, 20), + std::make_tuple(16000, 16000, 32000, 16000, 39, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + // Other sample rates are not tested exhaustively, to keep + // the test runtime manageable. + // + // Testing most other sample rates logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // ApmConfiguration.HandlingOfRateCombinations covers + // remaining sample rates. + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); + +#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) +INSTANTIATE_TEST_SUITE_P( + CommonFormats, + AudioProcessingTest, + ::testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 19, 0), + std::make_tuple(48000, 48000, 32000, 48000, 19, 30), + std::make_tuple(48000, 48000, 16000, 48000, 19, 20), + std::make_tuple(48000, 44100, 48000, 44100, 15, 20), + std::make_tuple(48000, 44100, 32000, 44100, 15, 15), + std::make_tuple(48000, 44100, 16000, 44100, 15, 15), + std::make_tuple(48000, 32000, 48000, 32000, 19, 35), + std::make_tuple(48000, 32000, 32000, 32000, 19, 0), + std::make_tuple(48000, 32000, 16000, 32000, 19, 20), + std::make_tuple(48000, 16000, 48000, 16000, 20, 20), + std::make_tuple(48000, 16000, 32000, 16000, 20, 20), + std::make_tuple(48000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 15, 0), + std::make_tuple(44100, 48000, 32000, 48000, 15, 30), + std::make_tuple(44100, 48000, 16000, 48000, 15, 20), + std::make_tuple(44100, 44100, 48000, 44100, 15, 20), + std::make_tuple(44100, 44100, 32000, 44100, 15, 15), + std::make_tuple(44100, 44100, 16000, 44100, 15, 15), + std::make_tuple(44100, 32000, 48000, 32000, 18, 35), + std::make_tuple(44100, 32000, 32000, 32000, 18, 0), + std::make_tuple(44100, 32000, 16000, 32000, 18, 20), + std::make_tuple(44100, 16000, 48000, 16000, 19, 20), + std::make_tuple(44100, 16000, 32000, 16000, 19, 20), + std::make_tuple(44100, 16000, 16000, 16000, 19, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 17, 0), + std::make_tuple(32000, 48000, 32000, 48000, 17, 30), + std::make_tuple(32000, 48000, 16000, 48000, 17, 20), + std::make_tuple(32000, 44100, 48000, 44100, 20, 20), + std::make_tuple(32000, 44100, 32000, 44100, 20, 15), + std::make_tuple(32000, 44100, 16000, 44100, 20, 15), + std::make_tuple(32000, 32000, 48000, 32000, 27, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 30, 20), + std::make_tuple(32000, 16000, 48000, 16000, 20, 20), + std::make_tuple(32000, 16000, 32000, 16000, 20, 20), + std::make_tuple(32000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 11, 0), + std::make_tuple(16000, 48000, 32000, 48000, 11, 30), + std::make_tuple(16000, 48000, 16000, 48000, 11, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 24, 35), + std::make_tuple(16000, 32000, 32000, 32000, 24, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 28, 20), + std::make_tuple(16000, 16000, 32000, 16000, 28, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); +#endif + +// Produces a scoped trace debug output. +std::string ProduceDebugText(int render_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t render_input_num_channels, + size_t render_output_num_channels, + size_t capture_input_num_channels, + size_t capture_output_num_channels) { + rtc::StringBuilder ss; + ss << "Sample rates:" + "\n Render input: " + << render_input_sample_rate_hz + << " Hz" + "\n Render output: " + << render_output_sample_rate_hz + << " Hz" + "\n Capture input: " + << capture_input_sample_rate_hz + << " Hz" + "\n Capture output: " + << capture_output_sample_rate_hz + << " Hz" + "\nNumber of channels:" + "\n Render input: " + << render_input_num_channels + << "\n Render output: " << render_output_num_channels + << "\n Capture input: " << capture_input_num_channels + << "\n Capture output: " << capture_output_num_channels; + return ss.Release(); +} + +// Validates that running the audio processing module using various combinations +// of sample rates and number of channels works as intended. +void RunApmRateAndChannelTest( + rtc::ArrayView sample_rates_hz, + rtc::ArrayView render_channel_counts, + rtc::ArrayView capture_channel_counts) { + webrtc::AudioProcessing::Config apm_config; + apm_config.pipeline.multi_channel_render = true; + apm_config.pipeline.multi_channel_capture = true; + apm_config.echo_canceller.enabled = true; + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); + + StreamConfig render_input_stream_config; + StreamConfig render_output_stream_config; + StreamConfig capture_input_stream_config; + StreamConfig capture_output_stream_config; + + std::vector render_input_frame_channels; + std::vector render_input_frame; + std::vector render_output_frame_channels; + std::vector render_output_frame; + std::vector capture_input_frame_channels; + std::vector capture_input_frame; + std::vector capture_output_frame_channels; + std::vector capture_output_frame; + + for (auto render_input_sample_rate_hz : sample_rates_hz) { + for (auto render_output_sample_rate_hz : sample_rates_hz) { + for (auto capture_input_sample_rate_hz : sample_rates_hz) { + for (auto capture_output_sample_rate_hz : sample_rates_hz) { + for (size_t render_input_num_channels : render_channel_counts) { + for (size_t capture_input_num_channels : capture_channel_counts) { + size_t render_output_num_channels = render_input_num_channels; + size_t capture_output_num_channels = capture_input_num_channels; + auto populate_audio_frame = [](int sample_rate_hz, + size_t num_channels, + StreamConfig* cfg, + std::vector* channels_data, + std::vector* frame_data) { + cfg->set_sample_rate_hz(sample_rate_hz); + cfg->set_num_channels(num_channels); + + size_t max_frame_size = + AudioProcessing::GetFrameSize(sample_rate_hz); + channels_data->resize(num_channels * max_frame_size); + std::fill(channels_data->begin(), channels_data->end(), 0.5f); + frame_data->resize(num_channels); + for (size_t channel = 0; channel < num_channels; ++channel) { + (*frame_data)[channel] = + &(*channels_data)[channel * max_frame_size]; + } + }; + + populate_audio_frame( + render_input_sample_rate_hz, render_input_num_channels, + &render_input_stream_config, &render_input_frame_channels, + &render_input_frame); + populate_audio_frame( + render_output_sample_rate_hz, render_output_num_channels, + &render_output_stream_config, &render_output_frame_channels, + &render_output_frame); + populate_audio_frame( + capture_input_sample_rate_hz, capture_input_num_channels, + &capture_input_stream_config, &capture_input_frame_channels, + &capture_input_frame); + populate_audio_frame( + capture_output_sample_rate_hz, capture_output_num_channels, + &capture_output_stream_config, &capture_output_frame_channels, + &capture_output_frame); + + for (size_t frame = 0; frame < 2; ++frame) { + SCOPED_TRACE(ProduceDebugText( + render_input_sample_rate_hz, render_output_sample_rate_hz, + capture_input_sample_rate_hz, capture_output_sample_rate_hz, + render_input_num_channels, render_output_num_channels, + render_input_num_channels, capture_output_num_channels)); + + int result = apm->ProcessReverseStream( + &render_input_frame[0], render_input_stream_config, + render_output_stream_config, &render_output_frame[0]); + EXPECT_EQ(result, AudioProcessing::kNoError); + result = apm->ProcessStream( + &capture_input_frame[0], capture_input_stream_config, + capture_output_stream_config, &capture_output_frame[0]); + EXPECT_EQ(result, AudioProcessing::kNoError); + } + } + } + } + } + } + } +} + +constexpr void Toggle(bool& b) { + b ^= true; +} + +} // namespace + +TEST(RuntimeSettingTest, TestDefaultCtor) { + auto s = AudioProcessing::RuntimeSetting(); + EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); +} + +TEST(RuntimeSettingTest, TestUsageWithSwapQueue) { + SwapQueue q(1); + auto s = AudioProcessing::RuntimeSetting(); + ASSERT_TRUE(q.Insert(&s)); + ASSERT_TRUE(q.Remove(&s)); + EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); +} + +TEST(ApmConfiguration, EnablePostProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + auto mock_post_processor_ptr = + new ::testing::NiceMock(); + auto mock_post_processor = + std::unique_ptr(mock_post_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetCapturePostProcessing(std::move(mock_post_processor)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_post_processor_ptr, Process(::testing::_)).Times(1); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EnablePreProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + auto mock_pre_processor_ptr = + new ::testing::NiceMock(); + auto mock_pre_processor = + std::unique_ptr(mock_pre_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetRenderPreProcessing(std::move(mock_pre_processor)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_pre_processor_ptr, Process(::testing::_)).Times(1); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EnableCaptureAnalyzer) { + // Verify that apm uses a capture analyzer if one is provided. + auto mock_capture_analyzer_ptr = + new ::testing::NiceMock(); + auto mock_capture_analyzer = + std::unique_ptr(mock_capture_analyzer_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetCaptureAnalyzer(std::move(mock_capture_analyzer)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_capture_analyzer_ptr, Analyze(::testing::_)).Times(1); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, PreProcessingReceivesRuntimeSettings) { + auto mock_pre_processor_ptr = + new ::testing::NiceMock(); + auto mock_pre_processor = + std::unique_ptr(mock_pre_processor_ptr); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetRenderPreProcessing(std::move(mock_pre_processor)) + .Create(); + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCustomRenderSetting(0)); + + // RuntimeSettings forwarded during 'Process*Stream' calls. + // Therefore we have to make one such call. + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_pre_processor_ptr, SetRuntimeSetting(::testing::_)) + .Times(1); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +class MyEchoControlFactory : public EchoControlFactory { + public: + std::unique_ptr Create(int sample_rate_hz) { + auto ec = new test::MockEchoControl(); + EXPECT_CALL(*ec, AnalyzeRender(::testing::_)).Times(1); + EXPECT_CALL(*ec, AnalyzeCapture(::testing::_)).Times(2); + EXPECT_CALL(*ec, ProcessCapture(::testing::_, ::testing::_, ::testing::_)) + .Times(2); + return std::unique_ptr(ec); + } + + std::unique_ptr Create(int sample_rate_hz, + int num_render_channels, + int num_capture_channels) { + return Create(sample_rate_hz); + } +}; + +TEST(ApmConfiguration, EchoControlInjection) { + // Verify that apm uses an injected echo controller if one is provided. + std::unique_ptr echo_control_factory( + new MyEchoControlFactory()); + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoControlFactory(std::move(echo_control_factory)) + .Create(); + + Int16FrameData audio; + audio.num_channels = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); + apm->ProcessReverseStream( + audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); + apm->ProcessStream(audio.data.data(), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + StreamConfig(audio.sample_rate_hz, audio.num_channels), + audio.data.data()); +} + +TEST(ApmConfiguration, EchoDetectorInjection) { + using ::testing::_; + rtc::scoped_refptr mock_echo_detector = + rtc::make_ref_counted<::testing::StrictMock>(); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/16000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(mock_echo_detector) + .Create(); + + // The echo detector is included in processing when enabled. + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .WillOnce([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(1); + + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, 16000); + + apm->ProcessReverseStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + + // When processing rates change, the echo detector is also reinitialized to + // match those. + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/48000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .Times(2) + .WillRepeatedly([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(2); + + SetFrameSampleRate(&frame, 48000); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessReverseStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); +} + +rtc::scoped_refptr CreateApm(bool mobile_aec) { + // Enable residual echo detection, for stats. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + if (!apm) { + return apm; + } + + ProcessingConfig processing_config = { + {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; + + if (apm->Initialize(processing_config) != 0) { + return nullptr; + } + + // Disable all components except for an AEC. + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = false; + apm_config.gain_controller1.enabled = false; + apm_config.gain_controller2.enabled = false; + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = mobile_aec; + apm_config.noise_suppression.enabled = false; + apm->ApplyConfig(apm_config); + return apm; +} + +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_MAC) +#define MAYBE_ApmStatistics DISABLED_ApmStatistics +#else +#define MAYBE_ApmStatistics ApmStatistics +#endif + +TEST(MAYBE_ApmStatistics, AECEnabledTest) { + // Set up APM with AEC3 and process some audio. + rtc::scoped_refptr apm = CreateApm(false); + ASSERT_TRUE(apm); + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + apm->ApplyConfig(apm_config); + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(); + // We expect all statistics to be set and have a sensible value. + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + ASSERT_TRUE(stats.echo_return_loss.has_value()); + EXPECT_NE(*stats.echo_return_loss, -100.0); + ASSERT_TRUE(stats.echo_return_loss_enhancement.has_value()); + EXPECT_NE(*stats.echo_return_loss_enhancement, -100.0); +} + +TEST(MAYBE_ApmStatistics, AECMEnabledTest) { + // Set up APM with AECM and process some audio. + rtc::scoped_refptr apm = CreateApm(true); + ASSERT_TRUE(apm); + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream( + frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(); + // We expect only the residual echo detector statistics to be set and have a + // sensible value. + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + EXPECT_FALSE(stats.echo_return_loss.has_value()); + EXPECT_FALSE(stats.echo_return_loss_enhancement.has_value()); +} + +TEST(ApmStatistics, DoNotReportVoiceDetectedStat) { + ProcessingConfig processing_config = { + {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; + + // Set up an audioframe. + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.data.data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + apm->Initialize(processing_config); + + // No metric should be reported. + EXPECT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + EXPECT_FALSE(apm->GetStatistics().voice_detected.has_value()); +} + +TEST(ApmStatistics, GetStatisticsReportsNoEchoDetectorStatsWhenDisabled) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + // Echo detector is disabled by default, no stats reported. + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_FALSE(stats.residual_echo_likelihood.has_value()); + EXPECT_FALSE(stats.residual_echo_likelihood_recent_max.has_value()); +} + +TEST(ApmStatistics, GetStatisticsReportsEchoDetectorStatsWhenEnabled) { + // Create APM with an echo detector injected. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + // Echo detector enabled: Report stats. + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); +} + +TEST(ApmConfiguration, HandlingOfRateAndChannelCombinations) { + std::array sample_rates_hz = {16000, 32000, 48000}; + std::array render_channel_counts = {1, 7}; + std::array capture_channel_counts = {1, 7}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, HandlingOfChannelCombinations) { + std::array sample_rates_hz = {48000}; + std::array render_channel_counts = {1, 2, 3, 4, 5, 6, 7, 8}; + std::array capture_channel_counts = {1, 2, 3, 4, 5, 6, 7, 8}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, HandlingOfRateCombinations) { + // Test rates <= 96000 logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // Higher rates are tested in AudioProcessingTest.Format, to keep the number + // of combinations in this test manageable. + std::array sample_rates_hz = {8000, 11025, 16000, 22050, 32000, + 44100, 48000, 88200, 96000}; + std::array render_channel_counts = {2}; + std::array capture_channel_counts = {2}; + RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, + capture_channel_counts); +} + +TEST(ApmConfiguration, SelfAssignment) { + // At some point memory sanitizer was complaining about self-assigment. + // Make sure we don't regress. + AudioProcessing::Config config; + AudioProcessing::Config* config2 = &config; + *config2 = *config2; // Workaround -Wself-assign-overloaded + SUCCEED(); // Real success is absence of defects from asan/msan/ubsan. +} + +TEST(AudioProcessing, GainController1ConfigEqual) { + AudioProcessing::Config::GainController1 a; + AudioProcessing::Config::GainController1 b; + EXPECT_EQ(a, b); + + Toggle(a.enabled); + b.enabled = a.enabled; + EXPECT_EQ(a, b); + + a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital; + b.mode = a.mode; + EXPECT_EQ(a, b); + + a.target_level_dbfs++; + b.target_level_dbfs = a.target_level_dbfs; + EXPECT_EQ(a, b); + + a.compression_gain_db++; + b.compression_gain_db = a.compression_gain_db; + EXPECT_EQ(a, b); + + Toggle(a.enable_limiter); + b.enable_limiter = a.enable_limiter; + EXPECT_EQ(a, b); + + auto& a_analog = a.analog_gain_controller; + auto& b_analog = b.analog_gain_controller; + + Toggle(a_analog.enabled); + b_analog.enabled = a_analog.enabled; + EXPECT_EQ(a, b); + + a_analog.startup_min_volume++; + b_analog.startup_min_volume = a_analog.startup_min_volume; + EXPECT_EQ(a, b); + + a_analog.clipped_level_min++; + b_analog.clipped_level_min = a_analog.clipped_level_min; + EXPECT_EQ(a, b); + + Toggle(a_analog.enable_digital_adaptive); + b_analog.enable_digital_adaptive = a_analog.enable_digital_adaptive; + EXPECT_EQ(a, b); +} + +// Checks that one differing parameter is sufficient to make two configs +// different. +TEST(AudioProcessing, GainController1ConfigNotEqual) { + AudioProcessing::Config::GainController1 a; + const AudioProcessing::Config::GainController1 b; + + Toggle(a.enabled); + EXPECT_NE(a, b); + a = b; + + a.mode = AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital; + EXPECT_NE(a, b); + a = b; + + a.target_level_dbfs++; + EXPECT_NE(a, b); + a = b; + + a.compression_gain_db++; + EXPECT_NE(a, b); + a = b; + + Toggle(a.enable_limiter); + EXPECT_NE(a, b); + a = b; + + auto& a_analog = a.analog_gain_controller; + const auto& b_analog = b.analog_gain_controller; + + Toggle(a_analog.enabled); + EXPECT_NE(a, b); + a_analog = b_analog; + + a_analog.startup_min_volume++; + EXPECT_NE(a, b); + a_analog = b_analog; + + a_analog.clipped_level_min++; + EXPECT_NE(a, b); + a_analog = b_analog; + + Toggle(a_analog.enable_digital_adaptive); + EXPECT_NE(a, b); + a_analog = b_analog; +} + +TEST(AudioProcessing, GainController2ConfigEqual) { + AudioProcessing::Config::GainController2 a; + AudioProcessing::Config::GainController2 b; + EXPECT_EQ(a, b); + + Toggle(a.enabled); + b.enabled = a.enabled; + EXPECT_EQ(a, b); + + a.fixed_digital.gain_db += 1.0f; + b.fixed_digital.gain_db = a.fixed_digital.gain_db; + EXPECT_EQ(a, b); + + auto& a_adaptive = a.adaptive_digital; + auto& b_adaptive = b.adaptive_digital; + + Toggle(a_adaptive.enabled); + b_adaptive.enabled = a_adaptive.enabled; + EXPECT_EQ(a, b); + + a_adaptive.headroom_db += 1.0f; + b_adaptive.headroom_db = a_adaptive.headroom_db; + EXPECT_EQ(a, b); + + a_adaptive.max_gain_db += 1.0f; + b_adaptive.max_gain_db = a_adaptive.max_gain_db; + EXPECT_EQ(a, b); + + a_adaptive.initial_gain_db += 1.0f; + b_adaptive.initial_gain_db = a_adaptive.initial_gain_db; + EXPECT_EQ(a, b); + + a_adaptive.max_gain_change_db_per_second += 1.0f; + b_adaptive.max_gain_change_db_per_second = + a_adaptive.max_gain_change_db_per_second; + EXPECT_EQ(a, b); + + a_adaptive.max_output_noise_level_dbfs += 1.0f; + b_adaptive.max_output_noise_level_dbfs = + a_adaptive.max_output_noise_level_dbfs; + EXPECT_EQ(a, b); +} + +// Checks that one differing parameter is sufficient to make two configs +// different. +TEST(AudioProcessing, GainController2ConfigNotEqual) { + AudioProcessing::Config::GainController2 a; + const AudioProcessing::Config::GainController2 b; + + Toggle(a.enabled); + EXPECT_NE(a, b); + a = b; + + a.fixed_digital.gain_db += 1.0f; + EXPECT_NE(a, b); + a.fixed_digital = b.fixed_digital; + + auto& a_adaptive = a.adaptive_digital; + const auto& b_adaptive = b.adaptive_digital; + + Toggle(a_adaptive.enabled); + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.headroom_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_gain_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.initial_gain_db += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_gain_change_db_per_second += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; + + a_adaptive.max_output_noise_level_dbfs += 1.0f; + EXPECT_NE(a, b); + a_adaptive = b_adaptive; +} + +struct ApmFormatHandlingTestParams { + enum class ExpectedOutput { + kErrorAndUnmodified, + kErrorAndSilence, + kErrorAndCopyOfFirstChannel, + kErrorAndExactCopy, + kNoError + }; + + StreamConfig input_config; + StreamConfig output_config; + ExpectedOutput expected_output; +}; + +class ApmFormatHandlingTest + : public ::testing::TestWithParam< + std::tuple> { + public: + ApmFormatHandlingTest() + : stream_direction_(std::get<0>(GetParam())), + test_params_(std::get<1>(GetParam())) {} + + protected: + ::testing::Message ProduceDebugMessage() { + return ::testing::Message() + << "input sample_rate_hz=" + << test_params_.input_config.sample_rate_hz() + << " num_channels=" << test_params_.input_config.num_channels() + << ", output sample_rate_hz=" + << test_params_.output_config.sample_rate_hz() + << " num_channels=" << test_params_.output_config.num_channels() + << ", stream_direction=" << stream_direction_ << ", expected_output=" + << static_cast(test_params_.expected_output); + } + + StreamDirection stream_direction_; + ApmFormatHandlingTestParams test_params_; +}; + +INSTANTIATE_TEST_SUITE_P( + FormatValidation, + ApmFormatHandlingTest, + testing::Combine( + ::testing::Values(kForward, kReverse), + ::testing::Values( + // Test cases with values on the boundary of legal ranges. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(8000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(8000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(384000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(384000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 2), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 3), + ApmFormatHandlingTestParams::ExpectedOutput::kNoError}, + + // Supported but incompatible formats. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 2), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 3), StreamConfig(16000, 4), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + + // Unsupported format and input / output mismatch. + ApmFormatHandlingTestParams{ + StreamConfig(7900, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(7900, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(390000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(390000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + ApmFormatHandlingTestParams{ + StreamConfig(-16000, 1), StreamConfig(16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence}, + + // Unsupported format but input / output formats match. + ApmFormatHandlingTestParams{StreamConfig(7900, 1), + StreamConfig(7900, 1), + ApmFormatHandlingTestParams:: + ExpectedOutput::kErrorAndExactCopy}, + ApmFormatHandlingTestParams{StreamConfig(390000, 1), + StreamConfig(390000, 1), + ApmFormatHandlingTestParams:: + ExpectedOutput::kErrorAndExactCopy}, + + // Unsupported but identical sample rate, channel mismatch. + ApmFormatHandlingTestParams{ + StreamConfig(7900, 1), StreamConfig(7900, 2), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + ApmFormatHandlingTestParams{ + StreamConfig(7900, 2), StreamConfig(7900, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel}, + + // Test cases with meaningless output format. + ApmFormatHandlingTestParams{ + StreamConfig(16000, 1), StreamConfig(-16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndUnmodified}, + ApmFormatHandlingTestParams{ + StreamConfig(-16000, 1), StreamConfig(-16000, 1), + ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndUnmodified}))); + +TEST_P(ApmFormatHandlingTest, IntApi) { + SCOPED_TRACE(ProduceDebugMessage()); + + // Set up input and output data. + const size_t num_input_samples = + test_params_.input_config.num_channels() * + std::abs(test_params_.input_config.sample_rate_hz() / 100); + const size_t num_output_samples = + test_params_.output_config.num_channels() * + std::abs(test_params_.output_config.sample_rate_hz() / 100); + std::vector input_block(num_input_samples); + for (int i = 0; i < static_cast(input_block.size()); ++i) { + input_block[i] = i; + } + std::vector output_block(num_output_samples); + constexpr int kUnlikelyOffset = 37; + for (int i = 0; i < static_cast(output_block.size()); ++i) { + output_block[i] = i - kUnlikelyOffset; + } + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error; + if (stream_direction_ == kForward) { + error = ap->ProcessStream(input_block.data(), test_params_.input_config, + test_params_.output_config, output_block.data()); + } else { + error = ap->ProcessReverseStream( + input_block.data(), test_params_.input_config, + test_params_.output_config, output_block.data()); + } + + // Check output. + switch (test_params_.expected_output) { + case ApmFormatHandlingTestParams::ExpectedOutput::kNoError: + EXPECT_EQ(error, AudioProcessing::kNoError); + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], i - kUnlikelyOffset); + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], 0); + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < test_params_.output_config.num_channels(); + ++ch) { + for (size_t i = 0; i < test_params_.output_config.num_frames(); ++i) { + EXPECT_EQ( + output_block[ch + i * test_params_.output_config.num_channels()], + static_cast(i * + test_params_.input_config.num_channels())); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy: + EXPECT_NE(error, AudioProcessing::kNoError); + for (int i = 0; i < static_cast(output_block.size()); ++i) { + EXPECT_EQ(output_block[i], i); + } + break; + } +} + +TEST_P(ApmFormatHandlingTest, FloatApi) { + SCOPED_TRACE(ProduceDebugMessage()); + + // Set up input and output data. + const size_t input_samples_per_channel = + std::abs(test_params_.input_config.sample_rate_hz()) / 100; + const size_t output_samples_per_channel = + std::abs(test_params_.output_config.sample_rate_hz()) / 100; + const size_t input_num_channels = test_params_.input_config.num_channels(); + const size_t output_num_channels = test_params_.output_config.num_channels(); + ChannelBuffer input_block(input_samples_per_channel, + input_num_channels); + ChannelBuffer output_block(output_samples_per_channel, + output_num_channels); + for (size_t ch = 0; ch < input_num_channels; ++ch) { + for (size_t i = 0; i < input_samples_per_channel; ++i) { + input_block.channels()[ch][i] = ch + i * input_num_channels; + } + } + constexpr int kUnlikelyOffset = 37; + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + output_block.channels()[ch][i] = + ch + i * output_num_channels - kUnlikelyOffset; + } + } + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error; + if (stream_direction_ == kForward) { + error = + ap->ProcessStream(input_block.channels(), test_params_.input_config, + test_params_.output_config, output_block.channels()); + } else { + error = ap->ProcessReverseStream( + input_block.channels(), test_params_.input_config, + test_params_.output_config, output_block.channels()); + } + + // Check output. + switch (test_params_.expected_output) { + case ApmFormatHandlingTestParams::ExpectedOutput::kNoError: + EXPECT_EQ(error, AudioProcessing::kNoError); + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + ch + i * output_num_channels - kUnlikelyOffset); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], 0); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput:: + kErrorAndCopyOfFirstChannel: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + input_block.channels()[0][i]); + } + } + break; + case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy: + EXPECT_NE(error, AudioProcessing::kNoError); + for (size_t ch = 0; ch < output_num_channels; ++ch) { + for (size_t i = 0; i < output_samples_per_channel; ++i) { + EXPECT_EQ(output_block.channels()[ch][i], + input_block.channels()[ch][i]); + } + } + break; + } +} + +TEST(ApmAnalyzeReverseStreamFormatTest, AnalyzeReverseStream) { + for (auto&& [input_config, expect_error] : + {std::tuple(StreamConfig(16000, 2), /*expect_error=*/false), + std::tuple(StreamConfig(8000, 1), /*expect_error=*/false), + std::tuple(StreamConfig(384000, 1), /*expect_error=*/false), + std::tuple(StreamConfig(7900, 1), /*expect_error=*/true), + std::tuple(StreamConfig(390000, 1), /*expect_error=*/true), + std::tuple(StreamConfig(16000, 0), /*expect_error=*/true), + std::tuple(StreamConfig(-16000, 0), /*expect_error=*/true)}) { + SCOPED_TRACE(::testing::Message() + << "sample_rate_hz=" << input_config.sample_rate_hz() + << " num_channels=" << input_config.num_channels()); + + // Set up input data. + ChannelBuffer input_block( + std::abs(input_config.sample_rate_hz()) / 100, + input_config.num_channels()); + + // Call APM. + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().Create(); + int error = ap->AnalyzeReverseStream(input_block.channels(), input_config); + + // Check output. + if (expect_error) { + EXPECT_NE(error, AudioProcessing::kNoError); + } else { + EXPECT_EQ(error, AudioProcessing::kNoError); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn new file mode 100644 index 0000000000..e7ff8482f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/BUILD.gn @@ -0,0 +1,45 @@ +# Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("capture_levels_adjuster") { + visibility = [ "*" ] + + sources = [ + "audio_samples_scaler.cc", + "audio_samples_scaler.h", + "capture_levels_adjuster.cc", + "capture_levels_adjuster.h", + ] + + defines = [] + + deps = [ + "..:audio_buffer", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_library("capture_levels_adjuster_unittests") { + testonly = true + + sources = [ + "audio_samples_scaler_unittest.cc", + "capture_levels_adjuster_unittest.cc", + ] + deps = [ + ":capture_levels_adjuster", + "..:audioproc_test_utils", + "../../../rtc_base:gunit_helpers", + "../../../rtc_base:stringutils", + "../../../test:test_support", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc new file mode 100644 index 0000000000..cb2336b87d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +AudioSamplesScaler::AudioSamplesScaler(float initial_gain) + : previous_gain_(initial_gain), target_gain_(initial_gain) {} + +void AudioSamplesScaler::Process(AudioBuffer& audio_buffer) { + if (static_cast(audio_buffer.num_frames()) != samples_per_channel_) { + // Update the members depending on audio-buffer length if needed. + RTC_DCHECK_GT(audio_buffer.num_frames(), 0); + samples_per_channel_ = static_cast(audio_buffer.num_frames()); + one_by_samples_per_channel_ = 1.f / samples_per_channel_; + } + + if (target_gain_ == 1.f && previous_gain_ == target_gain_) { + // If only a gain of 1 is to be applied, do an early return without applying + // any gain. + return; + } + + float gain = previous_gain_; + if (previous_gain_ == target_gain_) { + // Apply a non-changing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) { + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + sample *= gain; + } + } + } else { + const float increment = + (target_gain_ - previous_gain_) * one_by_samples_per_channel_; + + if (increment > 0.f) { + // Apply an increasing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); + ++channel) { + gain = previous_gain_; + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + gain = std::min(gain + increment, target_gain_); + sample *= gain; + } + } + } else { + // Apply a decreasing gain. + for (size_t channel = 0; channel < audio_buffer.num_channels(); + ++channel) { + gain = previous_gain_; + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + gain = std::max(gain + increment, target_gain_); + sample *= gain; + } + } + } + } + previous_gain_ = target_gain_; + + // Saturate the samples to be in the S16 range. + for (size_t channel = 0; channel < audio_buffer.num_channels(); ++channel) { + rtc::ArrayView channel_view(audio_buffer.channels()[channel], + samples_per_channel_); + for (float& sample : channel_view) { + constexpr float kMinFloatS16Value = -32768.f; + constexpr float kMaxFloatS16Value = 32767.f; + sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h new file mode 100644 index 0000000000..2ae8533940 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ +#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ + +#include + +#include "modules/audio_processing/audio_buffer.h" + +namespace webrtc { + +// Handles and applies a gain to the samples in an audio buffer. +// The gain is applied for each sample and any changes in the gain take effect +// gradually (in a linear manner) over one frame. +class AudioSamplesScaler { + public: + // C-tor. The supplied `initial_gain` is used immediately at the first call to + // Process(), i.e., in contrast to the gain supplied by SetGain(...) there is + // no gradual change to the `initial_gain`. + explicit AudioSamplesScaler(float initial_gain); + AudioSamplesScaler(const AudioSamplesScaler&) = delete; + AudioSamplesScaler& operator=(const AudioSamplesScaler&) = delete; + + // Applies the specified gain to the audio in `audio_buffer`. + void Process(AudioBuffer& audio_buffer); + + // Sets the gain to apply to each sample. + void SetGain(float gain) { target_gain_ = gain; } + + private: + float previous_gain_ = 1.f; + float target_gain_ = 1.f; + int samples_per_channel_ = -1; + float one_by_samples_per_channel_ = -1.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_AUDIO_SAMPLES_SCALER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc new file mode 100644 index 0000000000..6e5fc2cbe3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler_unittest.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +#include + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float SampleValueForChannel(int channel) { + constexpr float kSampleBaseValue = 100.f; + constexpr float kSampleChannelOffset = 1.f; + return kSampleBaseValue + channel * kSampleChannelOffset; +} + +void PopulateBuffer(AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel(SampleValueForChannel(ch), ch, audio_buffer); + } +} + +constexpr int kNumFramesToProcess = 10; + +class AudioSamplesScalerTest + : public ::testing::Test, + public ::testing::WithParamInterface> { + protected: + int sample_rate_hz() const { return std::get<0>(GetParam()); } + int num_channels() const { return std::get<1>(GetParam()); } + float initial_gain() const { return std::get<2>(GetParam()); } +}; + +INSTANTIATE_TEST_SUITE_P( + AudioSamplesScalerTestSuite, + AudioSamplesScalerTest, + ::testing::Combine(::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4), + ::testing::Values(0.1f, 1.f, 2.f, 4.f))); + +TEST_P(AudioSamplesScalerTest, InitialGainIsRespected) { + AudioSamplesScaler scaler(initial_gain()); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + initial_gain() * SampleValueForChannel(ch)); + } + } + } +} + +TEST_P(AudioSamplesScalerTest, VerifyGainAdjustment) { + const float higher_gain = initial_gain(); + const float lower_gain = higher_gain / 2.f; + + AudioSamplesScaler scaler(lower_gain); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + // Allow the intial, lower, gain to take effect. + PopulateBuffer(audio_buffer); + + scaler.Process(audio_buffer); + + // Set the new, higher, gain. + scaler.SetGain(higher_gain); + + // Ensure that the new, higher, gain is achieved gradually over one frame. + PopulateBuffer(audio_buffer); + + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames() - 1; ++i) { + EXPECT_LT(audio_buffer.channels_const()[ch][i], + higher_gain * SampleValueForChannel(ch)); + EXPECT_LE(audio_buffer.channels_const()[ch][i], + audio_buffer.channels_const()[ch][i + 1]); + } + EXPECT_LE(audio_buffer.channels_const()[ch][audio_buffer.num_frames() - 1], + higher_gain * SampleValueForChannel(ch)); + } + + // Ensure that the new, higher, gain is achieved and stay unchanged. + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + higher_gain * SampleValueForChannel(ch)); + } + } + } + + // Set the new, lower, gain. + scaler.SetGain(lower_gain); + + // Ensure that the new, lower, gain is achieved gradually over one frame. + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames() - 1; ++i) { + EXPECT_GT(audio_buffer.channels_const()[ch][i], + lower_gain * SampleValueForChannel(ch)); + EXPECT_GE(audio_buffer.channels_const()[ch][i], + audio_buffer.channels_const()[ch][i + 1]); + } + EXPECT_GE(audio_buffer.channels_const()[ch][audio_buffer.num_frames() - 1], + lower_gain * SampleValueForChannel(ch)); + } + + // Ensure that the new, lower, gain is achieved and stay unchanged. + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + scaler.Process(audio_buffer); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + lower_gain * SampleValueForChannel(ch)); + } + } + } +} + +TEST(AudioSamplesScaler, UpwardsClamping) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + constexpr float kGain = 10.f; + constexpr float kMaxClampedSampleValue = 32767.f; + static_assert(kGain > 1.f, ""); + + AudioSamplesScaler scaler(kGain); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel( + kMaxClampedSampleValue - audio_buffer.num_channels() + 1.f + ch, ch, + audio_buffer); + } + + scaler.Process(audio_buffer); + for (int ch = 0; ch < kNumChannels; ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + kMaxClampedSampleValue); + } + } + } +} + +TEST(AudioSamplesScaler, DownwardsClamping) { + constexpr int kSampleRateHz = 48000; + constexpr int kNumChannels = 1; + constexpr float kGain = 10.f; + constexpr float kMinClampedSampleValue = -32768.f; + static_assert(kGain > 1.f, ""); + + AudioSamplesScaler scaler(kGain); + + AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, + kNumChannels, kSampleRateHz, kNumChannels); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel( + kMinClampedSampleValue + audio_buffer.num_channels() - 1.f + ch, ch, + audio_buffer); + } + + scaler.Process(audio_buffer); + for (int ch = 0; ch < kNumChannels; ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + kMinClampedSampleValue); + } + } + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc new file mode 100644 index 0000000000..dfda582915 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr int kMinAnalogMicGainLevel = 0; +constexpr int kMaxAnalogMicGainLevel = 255; + +float ComputeLevelBasedGain(int emulated_analog_mic_gain_level) { + static_assert( + kMinAnalogMicGainLevel == 0, + "The minimum gain level must be 0 for the maths below to work."); + static_assert(kMaxAnalogMicGainLevel > 0, + "The minimum gain level must be larger than 0 for the maths " + "below to work."); + constexpr float kGainToLevelMultiplier = 1.f / kMaxAnalogMicGainLevel; + + RTC_DCHECK_GE(emulated_analog_mic_gain_level, kMinAnalogMicGainLevel); + RTC_DCHECK_LE(emulated_analog_mic_gain_level, kMaxAnalogMicGainLevel); + return kGainToLevelMultiplier * emulated_analog_mic_gain_level; +} + +float ComputePreGain(float pre_gain, + int emulated_analog_mic_gain_level, + bool emulated_analog_mic_gain_enabled) { + return emulated_analog_mic_gain_enabled + ? pre_gain * ComputeLevelBasedGain(emulated_analog_mic_gain_level) + : pre_gain; +} + +} // namespace + +CaptureLevelsAdjuster::CaptureLevelsAdjuster( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain) + : emulated_analog_mic_gain_enabled_(emulated_analog_mic_gain_enabled), + emulated_analog_mic_gain_level_(emulated_analog_mic_gain_level), + pre_gain_(pre_gain), + pre_adjustment_gain_(ComputePreGain(pre_gain_, + emulated_analog_mic_gain_level_, + emulated_analog_mic_gain_enabled_)), + pre_scaler_(pre_adjustment_gain_), + post_scaler_(post_gain) {} + +void CaptureLevelsAdjuster::ApplyPreLevelAdjustment(AudioBuffer& audio_buffer) { + pre_scaler_.Process(audio_buffer); +} + +void CaptureLevelsAdjuster::ApplyPostLevelAdjustment( + AudioBuffer& audio_buffer) { + post_scaler_.Process(audio_buffer); +} + +void CaptureLevelsAdjuster::SetPreGain(float pre_gain) { + pre_gain_ = pre_gain; + UpdatePreAdjustmentGain(); +} + +void CaptureLevelsAdjuster::SetPostGain(float post_gain) { + post_scaler_.SetGain(post_gain); +} + +void CaptureLevelsAdjuster::SetAnalogMicGainLevel(int level) { + RTC_DCHECK_GE(level, kMinAnalogMicGainLevel); + RTC_DCHECK_LE(level, kMaxAnalogMicGainLevel); + int clamped_level = + rtc::SafeClamp(level, kMinAnalogMicGainLevel, kMaxAnalogMicGainLevel); + + emulated_analog_mic_gain_level_ = clamped_level; + UpdatePreAdjustmentGain(); +} + +void CaptureLevelsAdjuster::UpdatePreAdjustmentGain() { + pre_adjustment_gain_ = + ComputePreGain(pre_gain_, emulated_analog_mic_gain_level_, + emulated_analog_mic_gain_enabled_); + pre_scaler_.SetGain(pre_adjustment_gain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h new file mode 100644 index 0000000000..38b68ad06c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ +#define MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ + +#include + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.h" + +namespace webrtc { + +// Adjusts the level of the capture signal before and after all capture-side +// processing is done using a combination of explicitly specified gains +// and an emulated analog gain functionality where a specified analog level +// results in an additional gain. The pre-adjustment is achieved by combining +// the gain value `pre_gain` and the level `emulated_analog_mic_gain_level` to +// form a combined gain of `pre_gain`*`emulated_analog_mic_gain_level`/255 which +// is multiplied to each sample. The intention of the +// `emulated_analog_mic_gain_level` is to be controlled by the analog AGC +// functionality and to produce an emulated analog mic gain equal to +// `emulated_analog_mic_gain_level`/255. The post level adjustment is achieved +// by multiplying each sample with the value of `post_gain`. Any changes in the +// gains take are done smoothly over one frame and the scaled samples are +// clamped to fit into the allowed S16 sample range. +class CaptureLevelsAdjuster { + public: + // C-tor. The values for the level and the gains must fulfill + // 0 <= emulated_analog_mic_gain_level <= 255. + // 0.f <= pre_gain. + // 0.f <= post_gain. + CaptureLevelsAdjuster(bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain); + CaptureLevelsAdjuster(const CaptureLevelsAdjuster&) = delete; + CaptureLevelsAdjuster& operator=(const CaptureLevelsAdjuster&) = delete; + + // Adjusts the level of the signal. This should be called before any of the + // other processing is performed. + void ApplyPreLevelAdjustment(AudioBuffer& audio_buffer); + + // Adjusts the level of the signal. This should be called after all of the + // other processing have been performed. + void ApplyPostLevelAdjustment(AudioBuffer& audio_buffer); + + // Sets the gain to apply to each sample before any of the other processing is + // performed. + void SetPreGain(float pre_gain); + + // Returns the total pre-adjustment gain applied, comprising both the pre_gain + // as well as the gain from the emulated analog mic, to each sample before any + // of the other processing is performed. + float GetPreAdjustmentGain() const { return pre_adjustment_gain_; } + + // Sets the gain to apply to each sample after all of the other processing + // have been performed. + void SetPostGain(float post_gain); + + // Sets the analog gain level to use for the emulated analog gain. + // `level` must be in the range [0...255]. + void SetAnalogMicGainLevel(int level); + + // Returns the current analog gain level used for the emulated analog gain. + int GetAnalogMicGainLevel() const { return emulated_analog_mic_gain_level_; } + + private: + // Updates the value of `pre_adjustment_gain_` based on the supplied values + // for `pre_gain` and `emulated_analog_mic_gain_level_`. + void UpdatePreAdjustmentGain(); + + const bool emulated_analog_mic_gain_enabled_; + int emulated_analog_mic_gain_level_; + float pre_gain_; + float pre_adjustment_gain_; + AudioSamplesScaler pre_scaler_; + AudioSamplesScaler post_scaler_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_CAPTURE_LEVELS_ADJUSTER_CAPTURE_LEVELS_ADJUSTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build new file mode 100644 index 0000000000..3e57c48b6f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/audio_samples_scaler.cc", + "/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("capture_levels_adjuster_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc new file mode 100644 index 0000000000..1183441a14 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster_unittest.cc @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/capture_levels_adjuster/capture_levels_adjuster.h" + +#include +#include + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float SampleValueForChannel(int channel) { + constexpr float kSampleBaseValue = 100.f; + constexpr float kSampleChannelOffset = 1.f; + return kSampleBaseValue + channel * kSampleChannelOffset; +} + +void PopulateBuffer(AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + test::FillBufferChannel(SampleValueForChannel(ch), ch, audio_buffer); + } +} + +float ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain) { + if (!emulated_analog_mic_gain_enabled) { + return pre_gain; + } + return pre_gain * std::min(emulated_analog_mic_gain_level, 255) / 255.f; +} + +float ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + bool emulated_analog_mic_gain_enabled, + int emulated_analog_mic_gain_level, + float pre_gain, + float post_gain) { + return post_gain * ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled, + emulated_analog_mic_gain_level, pre_gain); +} + +constexpr int kNumFramesToProcess = 10; + +class CaptureLevelsAdjusterTest + : public ::testing::Test, + public ::testing::WithParamInterface< + std::tuple> { + protected: + int sample_rate_hz() const { return std::get<0>(GetParam()); } + int num_channels() const { return std::get<1>(GetParam()); } + bool emulated_analog_mic_gain_enabled() const { + return std::get<2>(GetParam()); + } + int emulated_analog_mic_gain_level() const { return std::get<3>(GetParam()); } + float pre_gain() const { return std::get<4>(GetParam()); } + float post_gain() const { return std::get<5>(GetParam()); } +}; + +INSTANTIATE_TEST_SUITE_P( + CaptureLevelsAdjusterTestSuite, + CaptureLevelsAdjusterTest, + ::testing::Combine(::testing::Values(16000, 32000, 48000), + ::testing::Values(1, 2, 4), + ::testing::Values(false, true), + ::testing::Values(21, 255), + ::testing::Values(0.1f, 1.f, 4.f), + ::testing::Values(0.1f, 1.f, 4.f))); + +TEST_P(CaptureLevelsAdjusterTest, InitialGainIsInstantlyAchieved) { + CaptureLevelsAdjuster adjuster(emulated_analog_mic_gain_enabled(), + emulated_analog_mic_gain_level(), pre_gain(), + post_gain()); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + const float expected_signal_gain_after_pre_gain = + ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled(), emulated_analog_mic_gain_level(), + pre_gain()); + const float expected_signal_gain_after_post_level_adjustment = + ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + emulated_analog_mic_gain_enabled(), emulated_analog_mic_gain_level(), + pre_gain(), post_gain()); + + for (int frame = 0; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + EXPECT_FLOAT_EQ(adjuster.GetPreAdjustmentGain(), + expected_signal_gain_after_pre_gain); + + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ( + audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_pre_gain * SampleValueForChannel(ch)); + } + } + adjuster.ApplyPostLevelAdjustment(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_post_level_adjustment * + SampleValueForChannel(ch)); + } + } + } +} + +TEST_P(CaptureLevelsAdjusterTest, NewGainsAreAchieved) { + const int lower_emulated_analog_mic_gain_level = + emulated_analog_mic_gain_level(); + const float lower_pre_gain = pre_gain(); + const float lower_post_gain = post_gain(); + const int higher_emulated_analog_mic_gain_level = + std::min(lower_emulated_analog_mic_gain_level * 2, 255); + const float higher_pre_gain = lower_pre_gain * 2.f; + const float higher_post_gain = lower_post_gain * 2.f; + + CaptureLevelsAdjuster adjuster(emulated_analog_mic_gain_enabled(), + lower_emulated_analog_mic_gain_level, + lower_pre_gain, lower_post_gain); + + AudioBuffer audio_buffer(sample_rate_hz(), num_channels(), sample_rate_hz(), + num_channels(), sample_rate_hz(), num_channels()); + + const float expected_signal_gain_after_pre_gain = + ComputeExpectedSignalGainAfterApplyPreLevelAdjustment( + emulated_analog_mic_gain_enabled(), + higher_emulated_analog_mic_gain_level, higher_pre_gain); + const float expected_signal_gain_after_post_level_adjustment = + ComputeExpectedSignalGainAfterApplyPostLevelAdjustment( + emulated_analog_mic_gain_enabled(), + higher_emulated_analog_mic_gain_level, higher_pre_gain, + higher_post_gain); + + adjuster.SetPreGain(higher_pre_gain); + adjuster.SetPostGain(higher_post_gain); + adjuster.SetAnalogMicGainLevel(higher_emulated_analog_mic_gain_level); + + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + adjuster.ApplyPostLevelAdjustment(audio_buffer); + EXPECT_EQ(adjuster.GetAnalogMicGainLevel(), + higher_emulated_analog_mic_gain_level); + + for (int frame = 1; frame < kNumFramesToProcess; ++frame) { + PopulateBuffer(audio_buffer); + adjuster.ApplyPreLevelAdjustment(audio_buffer); + EXPECT_FLOAT_EQ(adjuster.GetPreAdjustmentGain(), + expected_signal_gain_after_pre_gain); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ( + audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_pre_gain * SampleValueForChannel(ch)); + } + } + + adjuster.ApplyPostLevelAdjustment(audio_buffer); + for (int ch = 0; ch < num_channels(); ++ch) { + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[ch][i], + expected_signal_gain_after_post_level_adjustment * + SampleValueForChannel(ch)); + } + } + + EXPECT_EQ(adjuster.GetAnalogMicGainLevel(), + higher_emulated_analog_mic_gain_level); + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/debug.proto b/third_party/libwebrtc/modules/audio_processing/debug.proto new file mode 100644 index 0000000000..cc5efbc73c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/debug.proto @@ -0,0 +1,115 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +// Contains the format of input/output/reverse audio. An Init message is added +// when any of the fields are changed. +message Init { + optional int32 sample_rate = 1; + optional int32 device_sample_rate = 2 [deprecated=true]; + optional int32 num_input_channels = 3; + optional int32 num_output_channels = 4; + optional int32 num_reverse_channels = 5; + optional int32 reverse_sample_rate = 6; + optional int32 output_sample_rate = 7; + optional int32 reverse_output_sample_rate = 8; + optional int32 num_reverse_output_channels = 9; + optional int64 timestamp_ms = 10; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message ReverseStream { + // int16 interleaved data. + optional bytes data = 1; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes channel = 2; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message Stream { + // int16 interleaved data. + optional bytes input_data = 1; + optional bytes output_data = 2; + + optional int32 delay = 3; + optional sint32 drift = 4; + optional int32 applied_input_volume = 5; + optional bool keypress = 6; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes input_channel = 7; + repeated bytes output_channel = 8; +} + +// Contains the configurations of various APM component. A Config message is +// added when any of the fields are changed. +message Config { + // Acoustic echo canceler. + optional bool aec_enabled = 1; + optional bool aec_delay_agnostic_enabled = 2; + optional bool aec_drift_compensation_enabled = 3; + optional bool aec_extended_filter_enabled = 4; + optional int32 aec_suppression_level = 5; + // Mobile AEC. + optional bool aecm_enabled = 6; + optional bool aecm_comfort_noise_enabled = 7 [deprecated = true]; + optional int32 aecm_routing_mode = 8 [deprecated = true]; + // Automatic gain controller. + optional bool agc_enabled = 9; + optional int32 agc_mode = 10; + optional bool agc_limiter_enabled = 11; + optional bool noise_robust_agc_enabled = 12; + // High pass filter. + optional bool hpf_enabled = 13; + // Noise suppression. + optional bool ns_enabled = 14; + optional int32 ns_level = 15; + // Transient suppression. + optional bool transient_suppression_enabled = 16; + // Semicolon-separated string containing experimental feature + // descriptions. + optional string experiments_description = 17; + reserved 18; // Intelligibility enhancer enabled (deprecated). + // Pre amplifier. + optional bool pre_amplifier_enabled = 19; + optional float pre_amplifier_fixed_gain_factor = 20; + + // Next field number 21. +} + +message PlayoutAudioDeviceInfo { + optional int32 id = 1; + optional int32 max_volume = 2; +} + +message RuntimeSetting { + optional float capture_pre_gain = 1; + optional float custom_render_processing_setting = 2; + optional float capture_fixed_post_gain = 3; + optional int32 playout_volume_change = 4; + optional PlayoutAudioDeviceInfo playout_audio_device_change = 5; + optional bool capture_output_used = 6; + optional float capture_post_gain = 7; +} + +message Event { + enum Type { + INIT = 0; + REVERSE_STREAM = 1; + STREAM = 2; + CONFIG = 3; + UNKNOWN_EVENT = 4; + RUNTIME_SETTING = 5; + } + + required Type type = 1; + + optional Init init = 2; + optional ReverseStream reverse_stream = 3; + optional Stream stream = 4; + optional Config config = 5; + optional RuntimeSetting runtime_setting = 6; +} diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc new file mode 100644 index 0000000000..f351811e08 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// TODO(peah): Increase the number of frames to proces when the issue of +// non repeatable test results have been found. +const int kNumFramesToProcess = 200; + +void SetupComponent(int sample_rate_hz, + EchoControlMobileImpl::RoutingMode routing_mode, + bool comfort_noise_enabled, + EchoControlMobileImpl* echo_control_mobile) { + echo_control_mobile->Initialize( + sample_rate_hz > 16000 ? 16000 : sample_rate_hz, 1, 1); + echo_control_mobile->set_routing_mode(routing_mode); + echo_control_mobile->enable_comfort_noise(comfort_noise_enabled); +} + +void ProcessOneFrame(int sample_rate_hz, + int stream_delay_ms, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + EchoControlMobileImpl* echo_control_mobile) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector render_audio; + EchoControlMobileImpl::PackRenderAudioBuffer( + render_audio_buffer, 1, render_audio_buffer->num_channels(), + &render_audio); + echo_control_mobile->ProcessRenderAudio(render_audio); + + echo_control_mobile->ProcessCaptureAudio(capture_audio_buffer, + stream_delay_ms); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int stream_delay_ms, + EchoControlMobileImpl::RoutingMode routing_mode, + bool comfort_noise_enabled, + const rtc::ArrayView& output_reference) { + EchoControlMobileImpl echo_control_mobile; + SetupComponent(sample_rate_hz, routing_mode, comfort_noise_enabled, + &echo_control_mobile); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels); + AudioBuffer render_buffer( + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels); + AudioBuffer capture_buffer( + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, stream_delay_ms, &render_buffer, + &capture_buffer, &echo_control_mobile); + } + + // Extract and verify the test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono8kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.005280f, 0.002380f, -0.000427f}; + + RunBitexactnessTest(8000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003601f, 0.002991f, 0.001923f}; + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono32kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002258f, 0.002899f, 0.003906f}; + + RunBitexactnessTest(32000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono48kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {-0.000046f, 0.000041f, 0.000249f}; + + RunBitexactnessTest(48000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOff_StreamDelay0) { + const float kOutputReference[] = {0.000000f, 0.000000f, 0.000000f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + false, kOutputReference); +} + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay5) { + const float kOutputReference[] = {0.003693f, 0.002930f, 0.001801f}; + + RunBitexactnessTest(16000, 1, 5, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay10) { + const float kOutputReference[] = {-0.002380f, -0.002533f, -0.002563f}; + + RunBitexactnessTest(16000, 1, 10, + EchoControlMobileImpl::RoutingMode::kLoudSpeakerphone, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_QuietEarpieceOrHeadset_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.000397f, 0.000000f, -0.000305f}; + + RunBitexactnessTest( + 16000, 1, 0, EchoControlMobileImpl::RoutingMode::kQuietEarpieceOrHeadset, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_Earpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002167f, 0.001617f, 0.001038f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kEarpiece, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudEarpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003540f, 0.002899f, 0.001862f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kLoudEarpiece, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_SpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003632f, 0.003052f, 0.001984f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobileImpl::RoutingMode::kSpeakerphone, true, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc new file mode 100644 index 0000000000..fa5cb8ffec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_control_mobile_impl.h" + +#include + +#include + +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +int16_t MapSetting(EchoControlMobileImpl::RoutingMode mode) { + switch (mode) { + case EchoControlMobileImpl::kQuietEarpieceOrHeadset: + return 0; + case EchoControlMobileImpl::kEarpiece: + return 1; + case EchoControlMobileImpl::kLoudEarpiece: + return 2; + case EchoControlMobileImpl::kSpeakerphone: + return 3; + case EchoControlMobileImpl::kLoudSpeakerphone: + return 4; + } + RTC_DCHECK_NOTREACHED(); + return -1; +} + +AudioProcessing::Error MapError(int err) { + switch (err) { + case AECM_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + case AECM_NULL_POINTER_ERROR: + return AudioProcessing::kNullPointerError; + case AECM_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + case AECM_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + default: + // AECM_UNSPECIFIED_ERROR + // AECM_UNINITIALIZED_ERROR + return AudioProcessing::kUnspecifiedError; + } +} + +} // namespace + +struct EchoControlMobileImpl::StreamProperties { + StreamProperties() = delete; + StreamProperties(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) + : sample_rate_hz(sample_rate_hz), + num_reverse_channels(num_reverse_channels), + num_output_channels(num_output_channels) {} + + int sample_rate_hz; + size_t num_reverse_channels; + size_t num_output_channels; +}; + +class EchoControlMobileImpl::Canceller { + public: + Canceller() { + state_ = WebRtcAecm_Create(); + RTC_CHECK(state_); + } + + ~Canceller() { + RTC_DCHECK(state_); + WebRtcAecm_Free(state_); + } + + Canceller(const Canceller&) = delete; + Canceller& operator=(const Canceller&) = delete; + + void* state() { + RTC_DCHECK(state_); + return state_; + } + + void Initialize(int sample_rate_hz) { + RTC_DCHECK(state_); + int error = WebRtcAecm_Init(state_, sample_rate_hz); + RTC_DCHECK_EQ(AudioProcessing::kNoError, error); + } + + private: + void* state_; +}; + +EchoControlMobileImpl::EchoControlMobileImpl() + : routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {} + +EchoControlMobileImpl::~EchoControlMobileImpl() {} + +void EchoControlMobileImpl::ProcessRenderAudio( + rtc::ArrayView packed_render_audio) { + RTC_DCHECK(stream_properties_); + + size_t buffer_index = 0; + size_t num_frames_per_band = + packed_render_audio.size() / (stream_properties_->num_output_channels * + stream_properties_->num_reverse_channels); + + for (auto& canceller : cancellers_) { + WebRtcAecm_BufferFarend(canceller->state(), + &packed_render_audio[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + } +} + +void EchoControlMobileImpl::PackRenderAudioBuffer( + const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector* packed_buffer) { + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + RTC_DCHECK_EQ(num_channels, audio->num_channels()); + + // The ordering convention must be followed to pass to the correct AECM. + packed_buffer->clear(); + int render_channel = 0; + for (size_t i = 0; i < num_output_channels; i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { + std::array data_to_buffer; + FloatS16ToS16(audio->split_bands_const(render_channel)[kBand0To8kHz], + audio->num_frames_per_band(), data_to_buffer.data()); + + // Buffer the samples in the render queue. + packed_buffer->insert( + packed_buffer->end(), data_to_buffer.data(), + data_to_buffer.data() + audio->num_frames_per_band()); + render_channel = (render_channel + 1) % audio->num_channels(); + } + } +} + +size_t EchoControlMobileImpl::NumCancellersRequired( + size_t num_output_channels, + size_t num_reverse_channels) { + return num_output_channels * num_reverse_channels; +} + +int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, + int stream_delay_ms) { + RTC_DCHECK(stream_properties_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_output_channels); + RTC_DCHECK_GE(cancellers_.size(), stream_properties_->num_reverse_channels * + audio->num_channels()); + + int err = AudioProcessing::kNoError; + + // The ordering convention must be followed to pass to the correct AECM. + size_t handle_index = 0; + for (size_t capture = 0; capture < audio->num_channels(); ++capture) { + // TODO(ajm): improve how this works, possibly inside AECM. + // This is kind of hacked up. + RTC_DCHECK_LT(capture, low_pass_reference_.size()); + const int16_t* noisy = + reference_copied_ ? low_pass_reference_[capture].data() : nullptr; + + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + + std::array split_bands_data; + int16_t* split_bands = split_bands_data.data(); + const int16_t* clean = split_bands_data.data(); + if (audio->split_bands(capture)[kBand0To8kHz]) { + FloatS16ToS16(audio->split_bands(capture)[kBand0To8kHz], + audio->num_frames_per_band(), split_bands_data.data()); + } else { + clean = nullptr; + split_bands = nullptr; + } + + if (noisy == NULL) { + noisy = clean; + clean = NULL; + } + for (size_t render = 0; render < stream_properties_->num_reverse_channels; + ++render) { + err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean, + split_bands, audio->num_frames_per_band(), + stream_delay_ms); + + if (split_bands) { + S16ToFloatS16(split_bands, audio->num_frames_per_band(), + audio->split_bands(capture)[kBand0To8kHz]); + } + + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + ++handle_index; + } + for (size_t band = 1u; band < audio->num_bands(); ++band) { + memset(audio->split_bands_f(capture)[band], 0, + audio->num_frames_per_band() * + sizeof(audio->split_bands_f(capture)[band][0])); + } + } + return AudioProcessing::kNoError; +} + +int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + routing_mode_ = mode; + return Configure(); +} + +EchoControlMobileImpl::RoutingMode EchoControlMobileImpl::routing_mode() const { + return routing_mode_; +} + +int EchoControlMobileImpl::enable_comfort_noise(bool enable) { + comfort_noise_enabled_ = enable; + return Configure(); +} + +bool EchoControlMobileImpl::is_comfort_noise_enabled() const { + return comfort_noise_enabled_; +} + +void EchoControlMobileImpl::Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) { + low_pass_reference_.resize(num_output_channels); + for (auto& reference : low_pass_reference_) { + reference.fill(0); + } + + stream_properties_.reset(new StreamProperties( + sample_rate_hz, num_reverse_channels, num_output_channels)); + + // AECM only supports 16 kHz or lower sample rates. + RTC_DCHECK_LE(stream_properties_->sample_rate_hz, + AudioProcessing::kSampleRate16kHz); + + cancellers_.resize( + NumCancellersRequired(stream_properties_->num_output_channels, + stream_properties_->num_reverse_channels)); + + for (auto& canceller : cancellers_) { + if (!canceller) { + canceller.reset(new Canceller()); + } + canceller->Initialize(sample_rate_hz); + } + Configure(); +} + +int EchoControlMobileImpl::Configure() { + AecmConfig config; + config.cngMode = comfort_noise_enabled_; + config.echoMode = MapSetting(routing_mode_); + int error = AudioProcessing::kNoError; + for (auto& canceller : cancellers_) { + int handle_error = WebRtcAecm_set_config(canceller->state(), config); + if (handle_error != AudioProcessing::kNoError) { + error = handle_error; + } + } + return error; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h new file mode 100644 index 0000000000..f7f2626a0e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_impl.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ + +#include +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +class AudioBuffer; + +// The acoustic echo control for mobile (AECM) component is a low complexity +// robust option intended for use on mobile devices. +class EchoControlMobileImpl { + public: + EchoControlMobileImpl(); + + ~EchoControlMobileImpl(); + + // Recommended settings for particular audio routes. In general, the louder + // the echo is expected to be, the higher this value should be set. The + // preferred setting may vary from device to device. + enum RoutingMode { + kQuietEarpieceOrHeadset, + kEarpiece, + kLoudEarpiece, + kSpeakerphone, + kLoudSpeakerphone + }; + + // Sets echo control appropriate for the audio routing `mode` on the device. + // It can and should be updated during a call if the audio routing changes. + int set_routing_mode(RoutingMode mode); + RoutingMode routing_mode() const; + + // Comfort noise replaces suppressed background noise to maintain a + // consistent signal level. + int enable_comfort_noise(bool enable); + bool is_comfort_noise_enabled() const; + + void ProcessRenderAudio(rtc::ArrayView packed_render_audio); + int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms); + + void Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels); + + static void PackRenderAudioBuffer(const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector* packed_buffer); + + static size_t NumCancellersRequired(size_t num_output_channels, + size_t num_reverse_channels); + + private: + class Canceller; + struct StreamProperties; + + int Configure(); + + RoutingMode routing_mode_; + bool comfort_noise_enabled_; + + std::vector> cancellers_; + std::unique_ptr stream_properties_; + std::vector> low_pass_reference_; + bool reference_copied_ = false; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc new file mode 100644 index 0000000000..ed0393043c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_control_mobile_unittest.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "test/gtest.h" + +namespace webrtc { +TEST(EchoControlMobileTest, InterfaceConfiguration) { + EchoControlMobileImpl aecm; + aecm.Initialize(AudioProcessing::kSampleRate16kHz, 2, 2); + + // Toggle routing modes + std::array routing_modes = { + EchoControlMobileImpl::kQuietEarpieceOrHeadset, + EchoControlMobileImpl::kEarpiece, + EchoControlMobileImpl::kLoudEarpiece, + EchoControlMobileImpl::kSpeakerphone, + EchoControlMobileImpl::kLoudSpeakerphone, + }; + for (auto mode : routing_modes) { + EXPECT_EQ(0, aecm.set_routing_mode(mode)); + EXPECT_EQ(mode, aecm.routing_mode()); + } + + // Turn comfort noise off/on + EXPECT_EQ(0, aecm.enable_comfort_noise(false)); + EXPECT_FALSE(aecm.is_comfort_noise_enabled()); + EXPECT_EQ(0, aecm.enable_comfort_noise(true)); + EXPECT_TRUE(aecm.is_comfort_noise_enabled()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc new file mode 100644 index 0000000000..a6d10edfe2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CircularBuffer::CircularBuffer(size_t size) : buffer_(size) {} +CircularBuffer::~CircularBuffer() = default; + +void CircularBuffer::Push(float value) { + buffer_[next_insertion_index_] = value; + ++next_insertion_index_; + next_insertion_index_ %= buffer_.size(); + RTC_DCHECK_LT(next_insertion_index_, buffer_.size()); + nr_elements_in_buffer_ = std::min(nr_elements_in_buffer_ + 1, buffer_.size()); + RTC_DCHECK_LE(nr_elements_in_buffer_, buffer_.size()); +} + +absl::optional CircularBuffer::Pop() { + if (nr_elements_in_buffer_ == 0) { + return absl::nullopt; + } + const size_t index = + (buffer_.size() + next_insertion_index_ - nr_elements_in_buffer_) % + buffer_.size(); + RTC_DCHECK_LT(index, buffer_.size()); + --nr_elements_in_buffer_; + return buffer_[index]; +} + +void CircularBuffer::Clear() { + std::fill(buffer_.begin(), buffer_.end(), 0.f); + next_insertion_index_ = 0; + nr_elements_in_buffer_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h new file mode 100644 index 0000000000..db1aeaebf6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ + +#include + +#include + +#include "absl/types/optional.h" + +namespace webrtc { + +// Ring buffer containing floating point values. +struct CircularBuffer { + public: + explicit CircularBuffer(size_t size); + ~CircularBuffer(); + + void Push(float value); + absl::optional Pop(); + size_t Size() const { return nr_elements_in_buffer_; } + // This function fills the buffer with zeros, but does not change its size. + void Clear(); + + private: + std::vector buffer_; + size_t next_insertion_index_ = 0; + // This is the number of elements that have been pushed into the circular + // buffer, not the allocated buffer size. + size_t nr_elements_in_buffer_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc new file mode 100644 index 0000000000..7a234d4a55 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(CircularBufferTests, LessThanMaxTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, FillTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, OverflowTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + test_buffer.Push(4.f); + // Because the circular buffer has a size of 3, the first insert should have + // been forgotten. + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); + EXPECT_EQ(4.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, ReadFromEmpty) { + CircularBuffer test_buffer(3); + EXPECT_EQ(absl::nullopt, test_buffer.Pop()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc new file mode 100644 index 0000000000..a9ebb8cd92 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void MeanVarianceEstimator::Update(float value) { + mean_ = (1.f - kAlpha) * mean_ + kAlpha * value; + variance_ = + (1.f - kAlpha) * variance_ + kAlpha * (value - mean_) * (value - mean_); + RTC_DCHECK(std::isfinite(mean_)); + RTC_DCHECK(std::isfinite(variance_)); +} + +float MeanVarianceEstimator::std_deviation() const { + RTC_DCHECK_GE(variance_, 0.f); + return sqrtf(variance_); +} + +float MeanVarianceEstimator::mean() const { + return mean_; +} + +void MeanVarianceEstimator::Clear() { + mean_ = 0.f; + variance_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h new file mode 100644 index 0000000000..7f793df1e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the mean and variance of a signal. +class MeanVarianceEstimator { + public: + void Update(float value); + float std_deviation() const; + float mean() const; + void Clear(); + + private: + // Estimate of the expected value of the input values. + float mean_ = 0.f; + // Estimate of the variance of the input values. + float variance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc new file mode 100644 index 0000000000..8327d23e8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc @@ -0,0 +1,65 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(MeanVarianceEstimatorTests, InsertTwoValues) { + MeanVarianceEstimator test_estimator; + // Insert two values. + test_estimator.Update(3.f); + test_estimator.Update(5.f); + + EXPECT_GT(test_estimator.mean(), 0.f); + EXPECT_GT(test_estimator.std_deviation(), 0.f); + // Test Clear method + test_estimator.Clear(); + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, InsertZeroes) { + MeanVarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(0.f); + } + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, ConstantValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(3.f); + } + // The mean should be close to three, and the standard deviation should be + // close to zero. + EXPECT_NEAR(3.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(0.0f, test_estimator.std_deviation(), 0.01f); +} + +TEST(MeanVarianceEstimatorTests, AlternatingValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(1.f); + test_estimator.Update(-1.f); + } + // The mean should be close to zero, and the standard deviation should be + // close to one. + EXPECT_NEAR(0.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(1.0f, test_estimator.std_deviation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc new file mode 100644 index 0000000000..3054e98bd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter for controlling how fast the estimated maximum decays after the +// previous maximum is no longer valid. With a value of 0.99, the maximum will +// decay to 1% of its former value after 460 updates. +constexpr float kDecayFactor = 0.99f; + +} // namespace + +MovingMax::MovingMax(size_t window_size) : window_size_(window_size) { + RTC_DCHECK_GT(window_size, 0); +} + +MovingMax::~MovingMax() {} + +void MovingMax::Update(float value) { + if (counter_ >= window_size_ - 1) { + max_value_ *= kDecayFactor; + } else { + ++counter_; + } + if (value > max_value_) { + max_value_ = value; + counter_ = 0; + } +} + +float MovingMax::max() const { + return max_value_; +} + +void MovingMax::Clear() { + max_value_ = 0.f; + counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h new file mode 100644 index 0000000000..f7d8ee8137 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ + +#include + +namespace webrtc { + +class MovingMax { + public: + explicit MovingMax(size_t window_size); + ~MovingMax(); + + void Update(float value); + float max() const; + // Reset all of the state in this class. + void Clear(); + + private: + float max_value_ = 0.f; + size_t counter_ = 0; + size_t window_size_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc new file mode 100644 index 0000000000..9429127a2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" + +#include "test/gtest.h" + +namespace webrtc { + +// Test if the maximum is correctly found. +TEST(MovingMaxTests, SimpleTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); +} + +// Test if values fall out of the window when expected. +TEST(MovingMaxTests, SlidingWindowTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.7f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + test_moving_max.Update(1.3f); + test_moving_max.Update(1.2f); + EXPECT_LT(test_moving_max.max(), 1.9f); +} + +// Test if Clear() works as expected. +TEST(MovingMaxTests, ClearTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); + test_moving_max.Clear(); + EXPECT_EQ(0.f, test_moving_max.max()); +} + +// Test the decay of the estimated maximum. +TEST(MovingMaxTests, DecayTest) { + MovingMax test_moving_max(1); + test_moving_max.Update(1.0f); + float previous_value = 1.0f; + for (int i = 0; i < 500; i++) { + test_moving_max.Update(0.0f); + EXPECT_LT(test_moving_max.max(), previous_value); + EXPECT_GT(test_moving_max.max(), 0.0f); + previous_value = test_moving_max.max(); + } + EXPECT_LT(test_moving_max.max(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc new file mode 100644 index 0000000000..8ec9fe9f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void NormalizedCovarianceEstimator::Update(float x, + float x_mean, + float x_sigma, + float y, + float y_mean, + float y_sigma) { + covariance_ = + (1.f - kAlpha) * covariance_ + kAlpha * (x - x_mean) * (y - y_mean); + normalized_cross_correlation_ = covariance_ / (x_sigma * y_sigma + .0001f); + RTC_DCHECK(isfinite(covariance_)); + RTC_DCHECK(isfinite(normalized_cross_correlation_)); +} + +void NormalizedCovarianceEstimator::Clear() { + covariance_ = 0.f; + normalized_cross_correlation_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h new file mode 100644 index 0000000000..e3c36d88ba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the normalized covariance between two +// signals. +class NormalizedCovarianceEstimator { + public: + void Update(float x, + float x_mean, + float x_var, + float y, + float y_mean, + float y_var); + // This function returns an estimate of the Pearson product-moment correlation + // coefficient of the two signals. + float normalized_cross_correlation() const { + return normalized_cross_correlation_; + } + float covariance() const { return covariance_; } + // This function resets the estimated values to zero. + void Clear(); + + private: + float normalized_cross_correlation_ = 0.f; + // Estimate of the covariance value. + float covariance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc new file mode 100644 index 0000000000..89fb9383f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc @@ -0,0 +1,41 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +TEST(NormalizedCovarianceEstimatorTests, IdenticalSignalTest) { + NormalizedCovarianceEstimator test_estimator; + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + } + // A normalized covariance value close to 1 is expected. + EXPECT_NEAR(1.f, test_estimator.normalized_cross_correlation(), 0.01f); + test_estimator.Clear(); + EXPECT_EQ(0.f, test_estimator.normalized_cross_correlation()); +} + +TEST(NormalizedCovarianceEstimatorTests, OppositeSignalTest) { + NormalizedCovarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + } + // A normalized covariance value close to -1 is expected. + EXPECT_NEAR(-1.f, test_estimator.normalized_cross_correlation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md b/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md new file mode 100644 index 0000000000..a77f62fbaf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/g3doc/audio_processing_module.md @@ -0,0 +1,26 @@ + + + +# Audio Processing Module (APM) + +## Overview + +The APM is responsible for applying speech enhancements effects to the +microphone signal. These effects are required for VoIP calling and some +examples include echo cancellation (AEC), noise suppression (NS) and +automatic gain control (AGC). + +The API for APM resides in [`/modules/audio_processing/include`][https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_processing/include]. +APM is created using the [`AudioProcessingBuilder`][https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/audio_processing/include/audio_processing.h] +builder that allows it to be customized and configured. + +Some specific aspects of APM include that: +* APM is fully thread-safe in that it can be accessed concurrently from + different threads. +* APM handles for any input sample rates < 384 kHz and achieves this by + automatic reconfiguration whenever a new sample format is observed. +* APM handles any number of microphone channels and loudspeaker channels, with + the same automatic reconfiguration as for the sample rates. + + +APM can either be used as part of the WebRTC native pipeline, or standalone. diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc new file mode 100644 index 0000000000..edc49d1401 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_control_impl.h" + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +typedef void Handle; + +namespace { +int16_t MapSetting(GainControl::Mode mode) { + switch (mode) { + case GainControl::kAdaptiveAnalog: + return kAgcModeAdaptiveAnalog; + case GainControl::kAdaptiveDigital: + return kAgcModeAdaptiveDigital; + case GainControl::kFixedDigital: + return kAgcModeFixedDigital; + } + RTC_DCHECK_NOTREACHED(); + return -1; +} + +// Applies the sub-frame `gains` to all the bands in `out` and clamps the output +// in the signed 16 bit range. +void ApplyDigitalGain(const int32_t gains[11], + size_t num_bands, + float* const* out) { + constexpr float kScaling = 1.f / 65536.f; + constexpr int kNumSubSections = 16; + constexpr float kOneByNumSubSections = 1.f / kNumSubSections; + + float gains_scaled[11]; + for (int k = 0; k < 11; ++k) { + gains_scaled[k] = gains[k] * kScaling; + } + + for (size_t b = 0; b < num_bands; ++b) { + float* out_band = out[b]; + for (int k = 0, sample = 0; k < 10; ++k) { + const float delta = + (gains_scaled[k + 1] - gains_scaled[k]) * kOneByNumSubSections; + float gain = gains_scaled[k]; + for (int n = 0; n < kNumSubSections; ++n, ++sample) { + RTC_DCHECK_EQ(k * kNumSubSections + n, sample); + out_band[sample] *= gain; + out_band[sample] = + std::min(32767.f, std::max(-32768.f, out_band[sample])); + gain += delta; + } + } + } +} + +} // namespace + +struct GainControlImpl::MonoAgcState { + MonoAgcState() { + state = WebRtcAgc_Create(); + RTC_CHECK(state); + } + + ~MonoAgcState() { + RTC_DCHECK(state); + WebRtcAgc_Free(state); + } + + MonoAgcState(const MonoAgcState&) = delete; + MonoAgcState& operator=(const MonoAgcState&) = delete; + int32_t gains[11]; + Handle* state; +}; + +int GainControlImpl::instance_counter_ = 0; + +GainControlImpl::GainControlImpl() + : data_dumper_(new ApmDataDumper(instance_counter_)), + mode_(kAdaptiveAnalog), + minimum_capture_level_(0), + maximum_capture_level_(255), + limiter_enabled_(true), + target_level_dbfs_(3), + compression_gain_db_(9), + analog_capture_level_(0), + was_analog_level_set_(false), + stream_is_saturated_(false) {} + +GainControlImpl::~GainControlImpl() = default; + +void GainControlImpl::ProcessRenderAudio( + rtc::ArrayView packed_render_audio) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + WebRtcAgc_AddFarend(mono_agcs_[ch]->state, packed_render_audio.data(), + packed_render_audio.size()); + } +} + +void GainControlImpl::PackRenderAudioBuffer( + const AudioBuffer& audio, + std::vector* packed_buffer) { + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, audio.num_frames_per_band()); + std::array + mixed_16_kHz_render_data; + rtc::ArrayView mixed_16_kHz_render( + mixed_16_kHz_render_data.data(), audio.num_frames_per_band()); + if (audio.num_channels() == 1) { + FloatS16ToS16(audio.split_bands_const(0)[kBand0To8kHz], + audio.num_frames_per_band(), mixed_16_kHz_render_data.data()); + } else { + const int num_channels = static_cast(audio.num_channels()); + for (size_t i = 0; i < audio.num_frames_per_band(); ++i) { + int32_t sum = 0; + for (int ch = 0; ch < num_channels; ++ch) { + sum += FloatS16ToS16(audio.split_channels_const(kBand0To8kHz)[ch][i]); + } + mixed_16_kHz_render_data[i] = sum / num_channels; + } + } + + packed_buffer->clear(); + packed_buffer->insert( + packed_buffer->end(), mixed_16_kHz_render.data(), + (mixed_16_kHz_render.data() + audio.num_frames_per_band())); +} + +int GainControlImpl::AnalyzeCaptureAudio(const AudioBuffer& audio) { + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, audio.num_frames_per_band()); + RTC_DCHECK_EQ(audio.num_channels(), *num_proc_channels_); + RTC_DCHECK_LE(*num_proc_channels_, mono_agcs_.size()); + + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + + if (mode_ == kAdaptiveAnalog) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + capture_levels_[ch] = analog_capture_level_; + + audio.ExportSplitChannelData(ch, split_bands); + + int err = + WebRtcAgc_AddMic(mono_agcs_[ch]->state, split_bands, + audio.num_bands(), audio.num_frames_per_band()); + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + } + } else if (mode_ == kAdaptiveDigital) { + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int32_t capture_level_out = 0; + + audio.ExportSplitChannelData(ch, split_bands); + + int err = + WebRtcAgc_VirtualMic(mono_agcs_[ch]->state, split_bands, + audio.num_bands(), audio.num_frames_per_band(), + analog_capture_level_, &capture_level_out); + + capture_levels_[ch] = capture_level_out; + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + } + } + + return AudioProcessing::kNoError; +} + +int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, + bool stream_has_echo) { + if (mode_ == kAdaptiveAnalog && !was_analog_level_set_) { + return AudioProcessing::kStreamParameterNotSetError; + } + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength, + audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); + + stream_is_saturated_ = false; + bool error_reported = false; + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int16_t split_band_data[AudioBuffer::kMaxNumBands] + [AudioBuffer::kMaxSplitFrameLength]; + int16_t* split_bands[AudioBuffer::kMaxNumBands] = { + split_band_data[0], split_band_data[1], split_band_data[2]}; + audio->ExportSplitChannelData(ch, split_bands); + + // The call to stream_has_echo() is ok from a deadlock perspective + // as the capture lock is allready held. + int32_t new_capture_level = 0; + uint8_t saturation_warning = 0; + int err_analyze = WebRtcAgc_Analyze( + mono_agcs_[ch]->state, split_bands, audio->num_bands(), + audio->num_frames_per_band(), capture_levels_[ch], &new_capture_level, + stream_has_echo, &saturation_warning, mono_agcs_[ch]->gains); + capture_levels_[ch] = new_capture_level; + + error_reported = error_reported || err_analyze != AudioProcessing::kNoError; + + stream_is_saturated_ = stream_is_saturated_ || saturation_warning == 1; + } + + // Choose the minimun gain for application + size_t index_to_apply = 0; + for (size_t ch = 1; ch < mono_agcs_.size(); ++ch) { + if (mono_agcs_[index_to_apply]->gains[10] < mono_agcs_[ch]->gains[10]) { + index_to_apply = ch; + } + } + + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + ApplyDigitalGain(mono_agcs_[index_to_apply]->gains, audio->num_bands(), + audio->split_bands(ch)); + } + + RTC_DCHECK_LT(0ul, *num_proc_channels_); + if (mode_ == kAdaptiveAnalog) { + // Take the analog level to be the minimum accross all channels. + analog_capture_level_ = capture_levels_[0]; + for (size_t ch = 1; ch < mono_agcs_.size(); ++ch) { + analog_capture_level_ = + std::min(analog_capture_level_, capture_levels_[ch]); + } + } + + if (error_reported) { + return AudioProcessing::kUnspecifiedError; + } + + was_analog_level_set_ = false; + + return AudioProcessing::kNoError; +} + + +// TODO(ajm): ensure this is called under kAdaptiveAnalog. +int GainControlImpl::set_stream_analog_level(int level) { + data_dumper_->DumpRaw("gain_control_set_stream_analog_level", 1, &level); + + was_analog_level_set_ = true; + if (level < minimum_capture_level_ || level > maximum_capture_level_) { + return AudioProcessing::kBadParameterError; + } + analog_capture_level_ = level; + + return AudioProcessing::kNoError; +} + +int GainControlImpl::stream_analog_level() const { + data_dumper_->DumpRaw("gain_control_stream_analog_level", 1, + &analog_capture_level_); + return analog_capture_level_; +} + +int GainControlImpl::set_mode(Mode mode) { + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + + mode_ = mode; + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + return AudioProcessing::kNoError; +} + + +int GainControlImpl::set_analog_level_limits(int minimum, int maximum) { + if (minimum < 0 || maximum > 65535 || maximum < minimum) { + return AudioProcessing::kBadParameterError; + } + + minimum_capture_level_ = minimum; + maximum_capture_level_ = maximum; + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + return AudioProcessing::kNoError; +} + + +int GainControlImpl::set_target_level_dbfs(int level) { + if (level > 31 || level < 0) { + return AudioProcessing::kBadParameterError; + } + target_level_dbfs_ = level; + return Configure(); +} + +int GainControlImpl::set_compression_gain_db(int gain) { + if (gain < 0 || gain > 90) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db(" << gain << ") failed."; + return AudioProcessing::kBadParameterError; + } + compression_gain_db_ = gain; + return Configure(); +} + +int GainControlImpl::enable_limiter(bool enable) { + limiter_enabled_ = enable; + return Configure(); +} + +void GainControlImpl::Initialize(size_t num_proc_channels, int sample_rate_hz) { + data_dumper_->InitiateNewSetOfRecordings(); + + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000); + + num_proc_channels_ = num_proc_channels; + sample_rate_hz_ = sample_rate_hz; + + mono_agcs_.resize(*num_proc_channels_); + capture_levels_.resize(*num_proc_channels_); + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + if (!mono_agcs_[ch]) { + mono_agcs_[ch].reset(new MonoAgcState()); + } + + int error = WebRtcAgc_Init(mono_agcs_[ch]->state, minimum_capture_level_, + maximum_capture_level_, MapSetting(mode_), + *sample_rate_hz_); + RTC_DCHECK_EQ(error, 0); + capture_levels_[ch] = analog_capture_level_; + } + + Configure(); +} + +int GainControlImpl::Configure() { + WebRtcAgcConfig config; + // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we + // change the interface. + // RTC_DCHECK_LE(target_level_dbfs_, 0); + // config.targetLevelDbfs = static_cast(-target_level_dbfs_); + config.targetLevelDbfs = static_cast(target_level_dbfs_); + config.compressionGaindB = static_cast(compression_gain_db_); + config.limiterEnable = limiter_enabled_; + + int error = AudioProcessing::kNoError; + for (size_t ch = 0; ch < mono_agcs_.size(); ++ch) { + int error_ch = WebRtcAgc_set_config(mono_agcs_[ch]->state, config); + if (error_ch != AudioProcessing::kNoError) { + error = error_ch; + } + } + return error; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h new file mode 100644 index 0000000000..8aea8f2e95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_impl.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ + +#include +#include + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc/gain_control.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainControlImpl : public GainControl { + public: + GainControlImpl(); + GainControlImpl(const GainControlImpl&) = delete; + GainControlImpl& operator=(const GainControlImpl&) = delete; + + ~GainControlImpl() override; + + void ProcessRenderAudio(rtc::ArrayView packed_render_audio); + int AnalyzeCaptureAudio(const AudioBuffer& audio); + int ProcessCaptureAudio(AudioBuffer* audio, bool stream_has_echo); + + void Initialize(size_t num_proc_channels, int sample_rate_hz); + + static void PackRenderAudioBuffer(const AudioBuffer& audio, + std::vector* packed_buffer); + + // GainControl implementation. + int stream_analog_level() const override; + bool is_limiter_enabled() const override { return limiter_enabled_; } + Mode mode() const override { return mode_; } + int set_mode(Mode mode) override; + int compression_gain_db() const override { return compression_gain_db_; } + int set_analog_level_limits(int minimum, int maximum) override; + int set_compression_gain_db(int gain) override; + int set_target_level_dbfs(int level) override; + int enable_limiter(bool enable) override; + int set_stream_analog_level(int level) override; + + private: + struct MonoAgcState; + + // GainControl implementation. + int target_level_dbfs() const override { return target_level_dbfs_; } + int analog_level_minimum() const override { return minimum_capture_level_; } + int analog_level_maximum() const override { return maximum_capture_level_; } + bool stream_is_saturated() const override { return stream_is_saturated_; } + + int Configure(); + + std::unique_ptr data_dumper_; + + Mode mode_; + int minimum_capture_level_; + int maximum_capture_level_; + bool limiter_enabled_; + int target_level_dbfs_; + int compression_gain_db_; + int analog_capture_level_ = 0; + bool was_analog_level_set_; + bool stream_is_saturated_; + + std::vector> mono_agcs_; + std::vector capture_levels_; + + absl::optional num_proc_channels_; + absl::optional sample_rate_hz_; + + static int instance_counter_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc b/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc new file mode 100644 index 0000000000..1662dc506f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_control_unittest.cc @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 100; + +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + GainControlImpl* gain_controller) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector render_audio; + GainControlImpl::PackRenderAudioBuffer(*render_audio_buffer, &render_audio); + gain_controller->ProcessRenderAudio(render_audio); + gain_controller->AnalyzeCaptureAudio(*capture_audio_buffer); + gain_controller->ProcessCaptureAudio(capture_audio_buffer, false); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void SetupComponent(int sample_rate_hz, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + GainControlImpl* gain_controller) { + gain_controller->Initialize(1, sample_rate_hz); + GainControl* gc = static_cast(gain_controller); + gc->set_mode(mode); + gc->set_stream_analog_level(stream_analog_level); + gc->set_target_level_dbfs(target_level_dbfs); + gc->set_compression_gain_db(compression_gain_db); + gc->enable_limiter(enable_limiter); + gc->set_analog_level_limits(analog_level_min, analog_level_max); +} + +void RunBitExactnessTest(int sample_rate_hz, + size_t num_channels, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + int achieved_stream_analog_level_reference, + rtc::ArrayView output_reference) { + GainControlImpl gain_controller; + SetupComponent(sample_rate_hz, mode, target_level_dbfs, stream_analog_level, + compression_gain_db, enable_limiter, analog_level_min, + analog_level_max, &gain_controller); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels); + AudioBuffer render_buffer( + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels); + AudioBuffer capture_buffer( + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, + &gain_controller); + } + + // Extract and verify the test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + EXPECT_EQ(achieved_stream_analog_level_reference, + gain_controller.stream_analog_level()); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Activate all these tests for ARM and ARM64 once the issue on the +// Chromium ARM and ARM64 boths have been identified. This is tracked in the +// issue https://bugs.chromium.org/p/webrtc/issues/detail?id=5711. + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.027313f, -0.015900f, -0.028107f, + -0.027313f, -0.015900f, -0.028107f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.003967f, -0.002777f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.015411f, -0.008972f, -0.015839f, + -0.015411f, -0.008972f, -0.015839f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006134f, -0.005524f, -0.005005f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006134f, -0.005524f, -0.005005}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.011749f, -0.008270f, -0.005219f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.048896f, -0.028479f, -0.050345f, + -0.048896f, -0.028479f, -0.050345f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018158f, -0.016357f, -0.014832f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018158f, -0.016357f, -0.014832f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 12; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 10, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.003998f, -0.002808f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 100, 5, + true, 70, 80, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.004028f, -0.002838f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, 5, + false, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.008728f, -0.006134f, -0.003845f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 40, 100, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.005859f, -0.004120f, -0.002594f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, + 30, true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc b/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc new file mode 100644 index 0000000000..dd3521268d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_controller2.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { +namespace { + +using Agc2Config = AudioProcessing::Config::GainController2; +using InputVolumeControllerConfig = InputVolumeController::Config; + +constexpr int kLogLimiterStatsPeriodMs = 30'000; +constexpr int kFrameLengthMs = 10; +constexpr int kLogLimiterStatsPeriodNumFrames = + kLogLimiterStatsPeriodMs / kFrameLengthMs; + +// Detects the available CPU features and applies any kill-switches. +AvailableCpuFeatures GetAllowedCpuFeatures() { + AvailableCpuFeatures features = GetAvailableCpuFeatures(); + if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) { + features.sse2 = false; + } + if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) { + features.avx2 = false; + } + if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) { + features.neon = false; + } + return features; +} + +// Peak and RMS audio levels in dBFS. +struct AudioLevels { + float peak_dbfs; + float rms_dbfs; +}; + +// Speech level info. +struct SpeechLevel { + bool is_confident; + float rms_dbfs; +}; + +// Computes the audio levels for the first channel in `frame`. +AudioLevels ComputeAudioLevels(AudioFrameView frame, + ApmDataDumper& data_dumper) { + float peak = 0.0f; + float rms = 0.0f; + for (const auto& x : frame.channel(0)) { + peak = std::max(std::fabs(x), peak); + rms += x * x; + } + AudioLevels levels{ + FloatS16ToDbfs(peak), + FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; + data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); + data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); + return levels; +} + +} // namespace + +std::atomic GainController2::instance_count_(0); + +GainController2::GainController2( + const Agc2Config& config, + const InputVolumeControllerConfig& input_volume_controller_config, + int sample_rate_hz, + int num_channels, + bool use_internal_vad) + : cpu_features_(GetAllowedCpuFeatures()), + data_dumper_(instance_count_.fetch_add(1) + 1), + fixed_gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)), + limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"), + calls_since_last_limiter_log_(0) { + RTC_DCHECK(Validate(config)); + data_dumper_.InitiateNewSetOfRecordings(); + + if (config.input_volume_controller.enabled || + config.adaptive_digital.enabled) { + // Create dependencies. + speech_level_estimator_ = std::make_unique( + &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold); + if (use_internal_vad) + vad_ = std::make_unique( + kVadResetPeriodMs, cpu_features_, sample_rate_hz); + } + + if (config.input_volume_controller.enabled) { + // Create controller. + input_volume_controller_ = std::make_unique( + num_channels, input_volume_controller_config); + // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method. + input_volume_controller_->Initialize(); + } + + if (config.adaptive_digital.enabled) { + // Create dependencies. + noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_); + saturation_protector_ = CreateSaturationProtector( + kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold, + &data_dumper_); + // Create controller. + adaptive_digital_controller_ = + std::make_unique( + &data_dumper_, config.adaptive_digital, + kAdjacentSpeechFramesThreshold); + } +} + +GainController2::~GainController2() = default; + +// TODO(webrtc:7494): Pass the flag also to the other components. +void GainController2::SetCaptureOutputUsed(bool capture_output_used) { + if (input_volume_controller_) { + input_volume_controller_->HandleCaptureOutputUsedChange( + capture_output_used); + } +} + +void GainController2::SetFixedGainDb(float gain_db) { + const float gain_factor = DbToRatio(gain_db); + if (fixed_gain_applier_.GetGainFactor() != gain_factor) { + // Reset the limiter to quickly react on abrupt level changes caused by + // large changes of the fixed gain. + limiter_.Reset(); + } + fixed_gain_applier_.SetGainFactor(gain_factor); +} + +void GainController2::Analyze(int applied_input_volume, + const AudioBuffer& audio_buffer) { + recommended_input_volume_ = absl::nullopt; + + RTC_DCHECK_GE(applied_input_volume, 0); + RTC_DCHECK_LE(applied_input_volume, 255); + + if (input_volume_controller_) { + input_volume_controller_->AnalyzeInputAudio(applied_input_volume, + audio_buffer); + } +} + +void GainController2::Process(absl::optional speech_probability, + bool input_volume_changed, + AudioBuffer* audio) { + recommended_input_volume_ = absl::nullopt; + + data_dumper_.DumpRaw("agc2_applied_input_volume_changed", + input_volume_changed); + if (input_volume_changed) { + // Handle input volume changes. + if (speech_level_estimator_) + speech_level_estimator_->Reset(); + if (saturation_protector_) + saturation_protector_->Reset(); + } + + AudioFrameView float_frame(audio->channels(), audio->num_channels(), + audio->num_frames()); + // Compute speech probability. + if (vad_) { + // When the VAD component runs, `speech_probability` should not be specified + // because APM should not run the same VAD twice (as an APM sub-module and + // internally in AGC2). + RTC_DCHECK(!speech_probability.has_value()); + speech_probability = vad_->Analyze(float_frame); + } + if (speech_probability.has_value()) { + RTC_DCHECK_GE(*speech_probability, 0.0f); + RTC_DCHECK_LE(*speech_probability, 1.0f); + } + // The speech probability may not be defined at this step (e.g., when the + // fixed digital controller alone is enabled). + if (speech_probability.has_value()) + data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability); + + // Compute audio, noise and speech levels. + AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_); + absl::optional noise_rms_dbfs; + if (noise_level_estimator_) { + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `noise_level_estimator_`. + noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame); + } + absl::optional speech_level; + if (speech_level_estimator_) { + RTC_DCHECK(speech_probability.has_value()); + speech_level_estimator_->Update( + audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability); + speech_level = + SpeechLevel{.is_confident = speech_level_estimator_->is_confident(), + .rms_dbfs = speech_level_estimator_->level_dbfs()}; + } + + // Update the recommended input volume. + if (input_volume_controller_) { + RTC_DCHECK(speech_level.has_value()); + RTC_DCHECK(speech_probability.has_value()); + if (speech_probability.has_value()) { + recommended_input_volume_ = + input_volume_controller_->RecommendInputVolume( + *speech_probability, + speech_level->is_confident + ? absl::optional(speech_level->rms_dbfs) + : absl::nullopt); + } + } + + if (adaptive_digital_controller_) { + RTC_DCHECK(saturation_protector_); + RTC_DCHECK(speech_probability.has_value()); + RTC_DCHECK(speech_level.has_value()); + saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs, + speech_level->rms_dbfs); + float headroom_db = saturation_protector_->HeadroomDb(); + data_dumper_.DumpRaw("agc2_headroom_db", headroom_db); + float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel()); + data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs); + RTC_DCHECK(noise_rms_dbfs.has_value()); + adaptive_digital_controller_->Process( + /*info=*/{.speech_probability = *speech_probability, + .speech_level_dbfs = speech_level->rms_dbfs, + .speech_level_reliable = speech_level->is_confident, + .noise_rms_dbfs = *noise_rms_dbfs, + .headroom_db = headroom_db, + .limiter_envelope_dbfs = limiter_envelope_dbfs}, + float_frame); + } + + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `limiter_`. + fixed_gain_applier_.ApplyGain(float_frame); + + limiter_.Process(float_frame); + + // Periodically log limiter stats. + if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) { + calls_since_last_limiter_log_ = 0; + InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats(); + RTC_LOG(LS_INFO) << "[AGC2] limiter stats" + << " | identity: " << stats.look_ups_identity_region + << " | knee: " << stats.look_ups_knee_region + << " | limiter: " << stats.look_ups_limiter_region + << " | saturation: " << stats.look_ups_saturation_region; + } +} + +bool GainController2::Validate( + const AudioProcessing::Config::GainController2& config) { + const auto& fixed = config.fixed_digital; + const auto& adaptive = config.adaptive_digital; + return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f && + adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f && + adaptive.initial_gain_db >= 0.0f && + adaptive.max_gain_change_db_per_second > 0.0f && + adaptive.max_output_noise_level_dbfs <= 0.0f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2.h b/third_party/libwebrtc/modules/audio_processing/gain_controller2.h new file mode 100644 index 0000000000..43b5828d35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ + +#include +#include +#include + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/gain_applier.h" +#include "modules/audio_processing/agc2/input_volume_controller.h" +#include "modules/audio_processing/agc2/limiter.h" +#include "modules/audio_processing/agc2/noise_level_estimator.h" +#include "modules/audio_processing/agc2/saturation_protector.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" +#include "modules/audio_processing/agc2/vad_wrapper.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +class AudioBuffer; + +// Gain Controller 2 aims to automatically adjust levels by acting on the +// microphone gain and/or applying digital gain. +class GainController2 { + public: + // Ctor. If `use_internal_vad` is true, an internal voice activity + // detector is used for digital adaptive gain. + GainController2( + const AudioProcessing::Config::GainController2& config, + const InputVolumeController::Config& input_volume_controller_config, + int sample_rate_hz, + int num_channels, + bool use_internal_vad); + GainController2(const GainController2&) = delete; + GainController2& operator=(const GainController2&) = delete; + ~GainController2(); + + // Sets the fixed digital gain. + void SetFixedGainDb(float gain_db); + + // Updates the input volume controller about whether the capture output is + // used or not. + void SetCaptureOutputUsed(bool capture_output_used); + + // Analyzes `audio_buffer` before `Process()` is called so that the analysis + // can be performed before digital processing operations take place (e.g., + // echo cancellation). The analysis consists of input clipping detection and + // prediction (if enabled). The value of `applied_input_volume` is limited to + // [0, 255]. + void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer); + + // Updates the recommended input volume, applies the adaptive digital and the + // fixed digital gains and runs a limiter on `audio`. + // When the internal VAD is not used, `speech_probability` should be specified + // and in the [0, 1] range. Otherwise ignores `speech_probability` and + // computes the speech probability via `vad_`. + // Handles input volume changes; if the caller cannot determine whether an + // input volume change occurred, set `input_volume_changed` to false. + void Process(absl::optional speech_probability, + bool input_volume_changed, + AudioBuffer* audio); + + static bool Validate(const AudioProcessing::Config::GainController2& config); + + AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; } + + absl::optional recommended_input_volume() const { + return recommended_input_volume_; + } + + private: + static std::atomic instance_count_; + const AvailableCpuFeatures cpu_features_; + ApmDataDumper data_dumper_; + + GainApplier fixed_gain_applier_; + std::unique_ptr noise_level_estimator_; + std::unique_ptr vad_; + std::unique_ptr speech_level_estimator_; + std::unique_ptr input_volume_controller_; + // TODO(bugs.webrtc.org/7494): Rename to `CrestFactorEstimator`. + std::unique_ptr saturation_protector_; + std::unique_ptr adaptive_digital_controller_; + Limiter limiter_; + + int calls_since_last_limiter_log_; + + // TODO(bugs.webrtc.org/7494): Remove intermediate storing at this level once + // APM refactoring is completed. + // Recommended input volume from `InputVolumecontroller`. Non-empty after + // `Process()` if input volume controller is enabled and + // `InputVolumeController::Process()` has returned a non-empty value. + absl::optional recommended_input_volume_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROLLER2_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build new file mode 100644 index 0000000000..fcbe873d27 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2_gn/moz.build @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/gain_controller2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("gain_controller2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc b/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc new file mode 100644 index 0000000000..5023bab617 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/gain_controller2_unittest.cc @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_controller2.h" + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "rtc_base/checks.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +using Agc2Config = AudioProcessing::Config::GainController2; +using InputVolumeControllerConfig = InputVolumeController::Config; + +// Sets all the samples in `ab` to `value`. +void SetAudioBufferSamples(float value, AudioBuffer& ab) { + for (size_t k = 0; k < ab.num_channels(); ++k) { + std::fill(ab.channels()[k], ab.channels()[k] + ab.num_frames(), value); + } +} + +float RunAgc2WithConstantInput(GainController2& agc2, + float input_level, + int num_frames, + int sample_rate_hz, + int num_channels = 1, + int applied_initial_volume = 0) { + const int num_samples = rtc::CheckedDivExact(sample_rate_hz, 100); + AudioBuffer ab(sample_rate_hz, num_channels, sample_rate_hz, num_channels, + sample_rate_hz, num_channels); + + // Give time to the level estimator to converge. + for (int i = 0; i < num_frames + 1; ++i) { + SetAudioBufferSamples(input_level, ab); + const auto applied_volume = agc2.recommended_input_volume(); + agc2.Analyze(applied_volume.value_or(applied_initial_volume), ab); + + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &ab); + } + + // Return the last sample from the last processed frame. + return ab.channels()[0][num_samples - 1]; +} + +std::unique_ptr CreateAgc2FixedDigitalMode( + float fixed_gain_db, + int sample_rate_hz) { + Agc2Config config; + config.adaptive_digital.enabled = false; + config.fixed_digital.gain_db = fixed_gain_db; + EXPECT_TRUE(GainController2::Validate(config)); + return std::make_unique( + config, InputVolumeControllerConfig{}, sample_rate_hz, + /*num_channels=*/1, + /*use_internal_vad=*/true); +} + +constexpr InputVolumeControllerConfig kTestInputVolumeControllerConfig{ + .clipped_level_min = 20, + .clipped_level_step = 30, + .clipped_ratio_threshold = 0.4, + .clipped_wait_frames = 50, + .enable_clipping_predictor = true, + .target_range_max_dbfs = -6, + .target_range_min_dbfs = -70, + .update_input_volume_wait_frames = 100, + .speech_probability_threshold = 0.9, + .speech_ratio_threshold = 1, +}; + +} // namespace + +TEST(GainController2, CheckDefaultConfig) { + Agc2Config config; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckFixedDigitalConfig) { + Agc2Config config; + // Attenuation is not allowed. + config.fixed_digital.gain_db = -5.0f; + EXPECT_FALSE(GainController2::Validate(config)); + // No gain is allowed. + config.fixed_digital.gain_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + // Positive gain is allowed. + config.fixed_digital.gain_db = 15.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckHeadroomDb) { + Agc2Config config; + config.adaptive_digital.headroom_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.headroom_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.headroom_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckMaxGainDb) { + Agc2Config config; + config.adaptive_digital.max_gain_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_db = 0.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckInitialGainDb) { + Agc2Config config; + config.adaptive_digital.initial_gain_db = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.initial_gain_db = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.initial_gain_db = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckAdaptiveDigitalMaxGainChangeSpeedConfig) { + Agc2Config config; + config.adaptive_digital.max_gain_change_db_per_second = -1.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_change_db_per_second = 0.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_gain_change_db_per_second = 5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, CheckAdaptiveDigitalMaxOutputNoiseLevelConfig) { + Agc2Config config; + config.adaptive_digital.max_output_noise_level_dbfs = 5.0f; + EXPECT_FALSE(GainController2::Validate(config)); + config.adaptive_digital.max_output_noise_level_dbfs = 0.0f; + EXPECT_TRUE(GainController2::Validate(config)); + config.adaptive_digital.max_output_noise_level_dbfs = -5.0f; + EXPECT_TRUE(GainController2::Validate(config)); +} + +TEST(GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerNotEnabled) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = false; + + auto gain_controller = std::make_unique( + config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); +} + +TEST( + GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerNotEnabledAndSpecificConfigUsed) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = false; + + auto gain_controller = std::make_unique( + config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); +} + +TEST(GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerEnabled) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = true; + config.adaptive_digital.enabled = true; + + auto gain_controller = std::make_unique( + config, InputVolumeControllerConfig{}, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); +} + +TEST( + GainController2, + CheckGetRecommendedInputVolumeWhenInputVolumeControllerEnabledAndSpecificConfigUsed) { + constexpr float kHighInputLevel = 32767.0f; + constexpr float kLowInputLevel = 1000.0f; + constexpr int kInitialInputVolume = 100; + constexpr int kNumChannels = 2; + constexpr int kNumFrames = 5; + constexpr int kSampleRateHz = 16000; + + Agc2Config config; + config.input_volume_controller.enabled = true; + config.adaptive_digital.enabled = true; + + auto gain_controller = std::make_unique( + config, kTestInputVolumeControllerConfig, kSampleRateHz, kNumChannels, + /*use_internal_vad=*/true); + + EXPECT_FALSE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with no clipping or detected speech. + RunAgc2WithConstantInput(*gain_controller, kLowInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); + + // Run AGC for a signal with clipping. + RunAgc2WithConstantInput(*gain_controller, kHighInputLevel, kNumFrames, + kSampleRateHz, kNumChannels, kInitialInputVolume); + + EXPECT_TRUE(gain_controller->recommended_input_volume().has_value()); +} + +// Checks that the default config is applied. +TEST(GainController2, ApplyDefaultConfig) { + auto gain_controller2 = std::make_unique( + Agc2Config{}, InputVolumeControllerConfig{}, + /*sample_rate_hz=*/16000, /*num_channels=*/2, + /*use_internal_vad=*/true); + EXPECT_TRUE(gain_controller2.get()); +} + +TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) { + constexpr float kInputLevel = 1000.0f; + constexpr size_t kNumFrames = 5; + constexpr size_t kSampleRateHz = 8000; + constexpr float kGain0Db = 0.0f; + constexpr float kGain20Db = 20.0f; + + auto agc2_fixed = CreateAgc2FixedDigitalMode(kGain0Db, kSampleRateHz); + + // Signal level is unchanged with 0 db gain. + EXPECT_FLOAT_EQ(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, kNumFrames, + kSampleRateHz), + kInputLevel); + + // +20 db should increase signal by a factor of 10. + agc2_fixed->SetFixedGainDb(kGain20Db); + EXPECT_FLOAT_EQ(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, kNumFrames, + kSampleRateHz), + kInputLevel * 10); +} + +TEST(GainController2FixedDigital, ChangeFixedGainShouldBeFastAndTimeInvariant) { + // Number of frames required for the fixed gain controller to adapt on the + // input signal when the gain changes. + constexpr size_t kNumFrames = 5; + + constexpr float kInputLevel = 1000.0f; + constexpr size_t kSampleRateHz = 8000; + constexpr float kGainDbLow = 0.0f; + constexpr float kGainDbHigh = 25.0f; + static_assert(kGainDbLow < kGainDbHigh, ""); + + auto agc2_fixed = CreateAgc2FixedDigitalMode(kGainDbLow, kSampleRateHz); + + // Start with a lower gain. + const float output_level_pre = RunAgc2WithConstantInput( + *agc2_fixed, kInputLevel, kNumFrames, kSampleRateHz); + + // Increase gain. + agc2_fixed->SetFixedGainDb(kGainDbHigh); + static_cast(RunAgc2WithConstantInput(*agc2_fixed, kInputLevel, + kNumFrames, kSampleRateHz)); + + // Back to the lower gain. + agc2_fixed->SetFixedGainDb(kGainDbLow); + const float output_level_post = RunAgc2WithConstantInput( + *agc2_fixed, kInputLevel, kNumFrames, kSampleRateHz); + + EXPECT_EQ(output_level_pre, output_level_post); +} + +class FixedDigitalTest + : public ::testing::TestWithParam> { + protected: + float gain_db_min() const { return std::get<0>(GetParam()); } + float gain_db_max() const { return std::get<1>(GetParam()); } + int sample_rate_hz() const { return std::get<2>(GetParam()); } + bool saturation_expected() const { return std::get<3>(GetParam()); } +}; + +TEST_P(FixedDigitalTest, CheckSaturationBehaviorWithLimiter) { + for (const float gain_db : test::LinSpace(gain_db_min(), gain_db_max(), 10)) { + SCOPED_TRACE(gain_db); + auto agc2_fixed = CreateAgc2FixedDigitalMode(gain_db, sample_rate_hz()); + const float processed_sample = + RunAgc2WithConstantInput(*agc2_fixed, /*input_level=*/32767.0f, + /*num_frames=*/5, sample_rate_hz()); + if (saturation_expected()) { + EXPECT_FLOAT_EQ(processed_sample, 32767.0f); + } else { + EXPECT_LT(processed_sample, 32767.0f); + } + } +} + +static_assert(test::kLimiterMaxInputLevelDbFs < 10, ""); +INSTANTIATE_TEST_SUITE_P( + GainController2, + FixedDigitalTest, + ::testing::Values( + // When gain < `test::kLimiterMaxInputLevelDbFs`, the limiter will not + // saturate the signal (at any sample rate). + std::make_tuple(0.1f, + test::kLimiterMaxInputLevelDbFs - 0.01f, + 8000, + false), + std::make_tuple(0.1, + test::kLimiterMaxInputLevelDbFs - 0.01f, + 48000, + false), + // When gain > `test::kLimiterMaxInputLevelDbFs`, the limiter will + // saturate the signal (at any sample rate). + std::make_tuple(test::kLimiterMaxInputLevelDbFs + 0.01f, + 10.0f, + 8000, + true), + std::make_tuple(test::kLimiterMaxInputLevelDbFs + 0.01f, + 10.0f, + 48000, + true))); + +// Processes a test audio file and checks that the gain applied at the end of +// the recording is close to the expected value. +TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + for (int i = 0; i < kNumFramesToProcess; ++i) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &audio_buffer); + } + + // Estimate the applied gain by processing a probing frame. + SetAudioBufferSamples(/*value=*/1.0f, audio_buffer); + agc2.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, &audio_buffer); + const float applied_gain_db = + 20.0f * std::log10(audio_buffer.channels_const()[0][0]); + + constexpr float kExpectedGainDb = 5.6f; + constexpr float kToleranceDb = 0.3f; + EXPECT_NEAR(applied_gain_db, kExpectedGainDb, kToleranceDb); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Checks that `GainController2` crashes in debug mode if it runs its internal +// VAD and the speech probability values are provided by the caller. +TEST(GainController2DeathTest, + DebugCrashIfUseInternalVadAndSpeechProbabilityGiven) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + // Create AGC2 so that the interval VAD is also created. + GainController2 agc2(/*config=*/{.adaptive_digital = {.enabled = true}}, + /*input_volume_controller_config=*/{}, kSampleRateHz, + kStereo, + /*use_internal_vad=*/true); + + EXPECT_DEATH(agc2.Process(/*speech_probability=*/0.123f, + /*input_volume_changed=*/false, &audio_buffer), + ""); +} +#endif + +// Processes a test audio file and checks that the injected speech probability +// is not ignored when the internal VAD is not used. +TEST(GainController2, + CheckInjectedVadProbabilityUsedWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/false); + GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz, + kStereo, kSampleRateHz, kStereo); + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + constexpr float kSpeechProbabilities[] = {1.0f, 0.3f}; + constexpr float kEpsilon = 0.0001f; + bool all_samples_zero = true; + bool all_samples_equal = true; + for (int i = 0, j = 0; i < kNumFramesToProcess; ++i, j = 1 - j) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + agc2.Process(kSpeechProbabilities[j], /*input_volume_changed=*/false, + &audio_buffer); + test::CopyVectorToAudioBuffer(stream_config, frame, + &audio_buffer_reference); + agc2_reference.Process(/*speech_probability=*/absl::nullopt, + /*input_volume_changed=*/false, + &audio_buffer_reference); + // Check the output buffers. + for (int i = 0; i < kStereo; ++i) { + for (int j = 0; j < static_cast(audio_buffer.num_frames()); ++j) { + all_samples_zero &= + fabs(audio_buffer.channels_const()[i][j]) < kEpsilon; + all_samples_equal &= + fabs(audio_buffer.channels_const()[i][j] - + audio_buffer_reference.channels_const()[i][j]) < kEpsilon; + } + } + } + EXPECT_FALSE(all_samples_zero); + EXPECT_FALSE(all_samples_equal); +} + +// Processes a test audio file and checks that the output is equal when +// an injected speech probability from `VoiceActivityDetectorWrapper` and +// the speech probability computed by the internal VAD are the same. +TEST(GainController2, + CheckEqualResultFromInjectedVadProbabilityWithAdaptiveDigitalController) { + constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz; + constexpr int kStereo = 2; + + // Create AGC2 enabling only the adaptive digital controller. + Agc2Config config; + config.fixed_digital.gain_db = 0.0f; + config.adaptive_digital.enabled = true; + GainController2 agc2(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/false); + GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, + kSampleRateHz, kStereo, + /*use_internal_vad=*/true); + VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz); + test::InputAudioFile input_file( + test::GetApmCaptureTestVectorFileName(kSampleRateHz), + /*loop_at_end=*/true); + const StreamConfig stream_config(kSampleRateHz, kStereo); + + // Init buffers. + constexpr int kFrameDurationMs = 10; + std::vector frame(kStereo * stream_config.num_frames()); + AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo, + kSampleRateHz, kStereo); + AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz, + kStereo, kSampleRateHz, kStereo); + + // Simulate. + constexpr float kGainDb = -6.0f; + const float gain = std::pow(10.0f, kGainDb / 20.0f); + constexpr int kDurationMs = 10000; + constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs; + for (int i = 0; i < kNumFramesToProcess; ++i) { + ReadFloatSamplesFromStereoFile(stream_config.num_frames(), + stream_config.num_channels(), &input_file, + frame); + // Apply a fixed gain to the input audio. + for (float& x : frame) { + x *= gain; + } + test::CopyVectorToAudioBuffer(stream_config, frame, + &audio_buffer_reference); + agc2_reference.Process(absl::nullopt, /*input_volume_changed=*/false, + &audio_buffer_reference); + test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); + float speech_probability = vad.Analyze(AudioFrameView( + audio_buffer.channels(), audio_buffer.num_channels(), + audio_buffer.num_frames())); + agc2.Process(speech_probability, /*input_volume_changed=*/false, + &audio_buffer); + // Check the output buffer. + for (int i = 0; i < kStereo; ++i) { + for (int j = 0; j < static_cast(audio_buffer.num_frames()); ++j) { + EXPECT_FLOAT_EQ(audio_buffer.channels_const()[i][j], + audio_buffer_reference.channels_const()[i][j]); + } + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc new file mode 100644 index 0000000000..3b4740f6a5 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/high_pass_filter.h" + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +// [B,A] = butter(2,100/8000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients16kHz = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + +// [B,A] = butter(2,100/16000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients32kHz = {{0.98621f, -1.97242f, 0.98621f}, + {-1.97223f, 0.97261f}}; + +// [B,A] = butter(2,100/24000,'high') +constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients48kHz = {{0.99079f, -1.98157f, 0.99079f}, + {-1.98149f, 0.98166f}}; + +constexpr size_t kNumberOfHighPassBiQuads = 1; + +const CascadedBiQuadFilter::BiQuadCoefficients& ChooseCoefficients( + int sample_rate_hz) { + switch (sample_rate_hz) { + case 16000: + return kHighPassFilterCoefficients16kHz; + case 32000: + return kHighPassFilterCoefficients32kHz; + case 48000: + return kHighPassFilterCoefficients48kHz; + default: + RTC_DCHECK_NOTREACHED(); + } + RTC_DCHECK_NOTREACHED(); + return kHighPassFilterCoefficients16kHz; +} + +} // namespace + +HighPassFilter::HighPassFilter(int sample_rate_hz, size_t num_channels) + : sample_rate_hz_(sample_rate_hz) { + filters_.resize(num_channels); + const auto& coefficients = ChooseCoefficients(sample_rate_hz_); + for (size_t k = 0; k < filters_.size(); ++k) { + filters_[k].reset( + new CascadedBiQuadFilter(coefficients, kNumberOfHighPassBiQuads)); + } +} + +HighPassFilter::~HighPassFilter() = default; + +void HighPassFilter::Process(AudioBuffer* audio, bool use_split_band_data) { + RTC_DCHECK(audio); + RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); + if (use_split_band_data) { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_data = rtc::ArrayView( + audio->split_bands(k)[0], audio->num_frames_per_band()); + filters_[k]->Process(channel_data); + } + } else { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_data = + rtc::ArrayView(&audio->channels()[k][0], audio->num_frames()); + filters_[k]->Process(channel_data); + } + } +} + +void HighPassFilter::Process(std::vector>* audio) { + RTC_DCHECK_EQ(filters_.size(), audio->size()); + for (size_t k = 0; k < audio->size(); ++k) { + filters_[k]->Process((*audio)[k]); + } +} + +void HighPassFilter::Reset() { + for (size_t k = 0; k < filters_.size(); ++k) { + filters_[k]->Reset(); + } +} + +void HighPassFilter::Reset(size_t num_channels) { + const size_t old_num_channels = filters_.size(); + filters_.resize(num_channels); + if (filters_.size() < old_num_channels) { + Reset(); + } else { + for (size_t k = 0; k < old_num_channels; ++k) { + filters_[k]->Reset(); + } + const auto& coefficients = ChooseCoefficients(sample_rate_hz_); + for (size_t k = old_num_channels; k < filters_.size(); ++k) { + filters_[k].reset( + new CascadedBiQuadFilter(coefficients, kNumberOfHighPassBiQuads)); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h new file mode 100644 index 0000000000..7e7c370cd1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +namespace webrtc { + +class AudioBuffer; + +class HighPassFilter { + public: + HighPassFilter(int sample_rate_hz, size_t num_channels); + ~HighPassFilter(); + HighPassFilter(const HighPassFilter&) = delete; + HighPassFilter& operator=(const HighPassFilter&) = delete; + + void Process(AudioBuffer* audio, bool use_split_band_data); + void Process(std::vector>* audio); + void Reset(); + void Reset(size_t num_channels); + + int sample_rate_hz() const { return sample_rate_hz_; } + size_t num_channels() const { return filters_.size(); } + + private: + const int sample_rate_hz_; + std::vector> filters_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build new file mode 100644 index 0000000000..4c8aac9c3d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/high_pass_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("high_pass_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc new file mode 100644 index 0000000000..9f3c8fe595 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/high_pass_filter_unittest.cc @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/high_pass_filter.h" + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Process one frame of data via the AudioBuffer interface and produce the +// output. +std::vector ProcessOneFrameAsAudioBuffer( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { + AudioBuffer audio_buffer( + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); + + test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); + high_pass_filter->Process(&audio_buffer, /*use_split_band_data=*/false); + std::vector frame_output; + test::ExtractVectorFromAudioBuffer(stream_config, &audio_buffer, + &frame_output); + return frame_output; +} + +// Process one frame of data via the vector interface and produce the output. +std::vector ProcessOneFrameAsVector( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { + std::vector> process_vector( + stream_config.num_channels(), + std::vector(stream_config.num_frames())); + + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + process_vector[channel][k] = + frame_input[k * stream_config.num_channels() + channel]; + } + } + + high_pass_filter->Process(&process_vector); + + std::vector output; + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + output.push_back(process_vector[channel][k]); + } + } + + return process_vector[0]; +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int num_channels, + bool use_audio_buffer_interface, + const std::vector& input, + const std::vector& reference) { + const StreamConfig stream_config(16000, num_channels); + HighPassFilter high_pass_filter(16000, num_channels); + + std::vector output; + const size_t num_frames_to_process = + input.size() / + (stream_config.num_frames() * stream_config.num_channels()); + for (size_t frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { + std::vector frame_input( + input.begin() + stream_config.num_frames() * + stream_config.num_channels() * frame_no, + input.begin() + stream_config.num_frames() * + stream_config.num_channels() * (frame_no + 1)); + if (use_audio_buffer_interface) { + output = ProcessOneFrameAsAudioBuffer(frame_input, stream_config, + &high_pass_filter); + } else { + output = ProcessOneFrameAsVector(frame_input, stream_config, + &high_pass_filter); + } + } + + // Form vector to compare the reference to. Only the last frame processed + // is compared in order not having to specify all preceeding frames as + // inputs. As the algorithm being tested has a memory, testing only + // the last frame implicitly also tests the preceeding frames. + const size_t reference_frame_length = + reference.size() / stream_config.num_channels(); + std::vector output_to_verify; + for (size_t channel_no = 0; channel_no < stream_config.num_channels(); + ++channel_no) { + output_to_verify.insert( + output_to_verify.end(), + output.begin() + channel_no * stream_config.num_frames(), + output.begin() + channel_no * stream_config.num_frames() + + reference_frame_length); + } + + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + reference_frame_length, num_channels, reference, output_to_verify, + kElementErrorBound)); +} + +// Method for forming a vector out of an array. +// TODO(peah): Remove once braced initialization is allowed. +std::vector CreateVector(const rtc::ArrayView& array_view) { + std::vector v; + for (auto value : array_view) { + v.push_back(value); + } + return v; +} +} // namespace + +TEST(HighPassFilterAccuracyTest, ResetWithAudioBufferInterface) { + const StreamConfig stream_config_stereo(16000, 2); + const StreamConfig stream_config_mono(16000, 1); + std::vector x_mono(160, 1.f); + std::vector x_stereo(320, 1.f); + HighPassFilter hpf(16000, 1); + std::vector y = + ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsAudioBuffer(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); +} + +TEST(HighPassFilterAccuracyTest, ResetWithVectorInterface) { + const StreamConfig stream_config_stereo(16000, 2); + const StreamConfig stream_config_mono(16000, 1); + std::vector x_mono(160, 1.f); + std::vector x_stereo(320, 1.f); + HighPassFilter hpf(16000, 1); + std::vector y = + ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsVector(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); +} + +TEST(HighPassFilterAccuracyTest, MonoInitial) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f}; + const float kReference[] = {0.146139f, 0.490336f, -0.649520f, 0.233881f, + 0.073214f, -0.373256f, -0.115394f, 0.102109f, + 0.976217f, 0.702270f, -0.457697f, 0.757116f}; + + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } +} + +TEST(HighPassFilterAccuracyTest, MonoConverged) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f, -0.082091f, -0.236957f, + -0.313025f, 0.705903f, 0.462637f, 0.085942f, -0.351308f, -0.241859f, + -0.049333f, 0.221165f, -0.372235f, -0.651092f, -0.404957f, 0.093201f, + 0.109366f, 0.126224f, -0.036409f, 0.051333f, -0.133063f, 0.240896f, + -0.380532f, 0.127160f, -0.237176f, -0.093586f, 0.154478f, 0.290379f, + -0.312329f, 0.352297f, 0.184480f, -0.018965f, -0.054555f, -0.060811f, + -0.084705f, 0.006440f, 0.014333f, 0.230847f, 0.426721f, 0.130481f, + -0.058605f, 0.174712f, 0.051204f, -0.287773f, 0.265265f, 0.085810f, + 0.037775f, 0.143988f, 0.073051f, -0.263103f, -0.045366f, -0.040816f, + -0.148673f, 0.470072f, -0.244727f, -0.135204f, -0.198973f, -0.328139f, + -0.053722f, -0.076590f, 0.427586f, -0.069591f, -0.297399f, 0.448094f, + 0.345037f, -0.064170f, -0.420903f, -0.124253f, -0.043578f, 0.077149f, + -0.072983f, 0.123916f, 0.109517f, -0.349508f, -0.264912f, -0.207106f, + -0.141912f, -0.089586f, 0.003485f, -0.846518f, -0.127715f, 0.347208f, + -0.298095f, 0.260935f, 0.097899f, -0.008106f, 0.050987f, -0.437362f, + -0.023625f, 0.448230f, 0.027484f, 0.011562f, -0.205167f, -0.008611f, + 0.064930f, 0.119156f, -0.104183f, -0.066078f, 0.565530f, -0.631108f, + 0.623029f, 0.094334f, 0.279472f, -0.465059f, -0.164888f, -0.077706f, + 0.118130f, -0.466746f, 0.131800f, -0.338936f, 0.018497f, 0.182304f, + 0.091398f, 0.302547f, 0.281153f, -0.181899f, 0.071836f, -0.263911f, + -0.369380f, 0.258447f, 0.000014f, -0.015347f, 0.254619f, 0.166159f, + 0.097865f, 0.349389f, 0.259834f, 0.067003f, -0.192925f, -0.182080f, + 0.333139f, -0.450434f, -0.006836f, -0.544615f, 0.285183f, 0.240811f, + 0.000325f, -0.019796f, -0.694804f, 0.162411f, -0.612686f, -0.648134f, + 0.022338f, -0.265058f, 0.114993f, 0.189185f, 0.239697f, -0.193148f, + 0.125581f, 0.028122f, 0.230849f, 0.149832f, 0.250919f, -0.036871f, + -0.041136f, 0.281627f, -0.593466f, -0.141009f, -0.355074f, -0.106915f, + 0.181276f, 0.230753f, -0.283631f, -0.131643f, 0.038292f, -0.081563f, + 0.084345f, 0.111763f, -0.259882f, -0.049416f, -0.595824f, 0.320077f, + -0.175802f, -0.336422f, -0.070966f, -0.399242f, -0.005829f, -0.156680f, + 0.608591f, 0.318150f, -0.697767f, 0.123331f, -0.390716f, -0.071276f, + 0.045943f, 0.208958f, -0.076304f, 0.440505f, -0.134400f, 0.091525f, + 0.185763f, 0.023806f, 0.246186f, 0.090323f, -0.219133f, -0.504520f, + 0.519393f, -0.168939f, 0.028884f, 0.157380f, 0.031745f, -0.252830f, + -0.130705f, -0.034901f, 0.413302f, -0.240559f, 0.219279f, 0.086246f, + -0.065353f, -0.295376f, -0.079405f, -0.024226f, -0.410629f, 0.053706f, + -0.229794f, -0.026336f, 0.093956f, -0.252810f, -0.080555f, 0.097827f, + -0.513040f, 0.289508f, 0.677527f, 0.268109f, -0.088244f, 0.119781f, + -0.289511f, 0.524778f, 0.262884f, 0.220028f, -0.244767f, 0.089411f, + -0.156018f, -0.087030f, -0.159292f, -0.286646f, -0.253953f, -0.058657f, + -0.474756f, 0.169797f, -0.032919f, 0.195384f, 0.075355f, 0.138131f, + -0.414465f, -0.285118f, -0.124915f, 0.030645f, 0.315431f, -0.081032f, + 0.352546f, 0.132860f, 0.328112f, 0.035476f, -0.183550f, -0.413984f, + 0.043452f, 0.228748f, -0.081765f, -0.151125f, -0.086251f, -0.306448f, + -0.137774f, -0.050508f, 0.012811f, -0.017824f, 0.170841f, 0.030549f, + 0.506935f, 0.087197f, 0.504274f, -0.202080f, 0.147146f, -0.072728f, + 0.167713f, 0.165977f, -0.610894f, -0.370849f, -0.402698f, 0.112297f, + 0.410855f, -0.091330f, 0.227008f, 0.152454f, -0.293884f, 0.111074f, + -0.210121f, 0.423728f, -0.009101f, 0.457188f, -0.118785f, 0.164720f, + -0.017547f, -0.565046f, -0.274461f, 0.171169f, -0.015338f, -0.312635f, + -0.175044f, 0.069729f, -0.277504f, 0.272454f, -0.179049f, 0.505495f, + -0.301774f, 0.055664f, -0.425058f, -0.202222f, -0.165787f, 0.112155f, + 0.263284f, 0.083972f, -0.104256f, 0.227892f, 0.223253f, 0.033592f, + 0.159638f, 0.115358f, -0.275811f, 0.212265f, -0.183658f, -0.168768f}; + + const float kReference[] = {-0.248836f, -0.086982f, 0.083715f, -0.036787f, + 0.127212f, 0.147464f, -0.221733f, -0.004484f, + -0.535107f, 0.385999f, -0.116346f, -0.265302f}; + + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc new file mode 100644 index 0000000000..8f788cb802 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { +InternalAPMConfig::InternalAPMConfig() = default; +InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default; +InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default; +InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) = + default; + +bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const { + return aec_enabled == other.aec_enabled && + aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled && + aec_drift_compensation_enabled == + other.aec_drift_compensation_enabled && + aec_extended_filter_enabled == other.aec_extended_filter_enabled && + aec_suppression_level == other.aec_suppression_level && + aecm_enabled == other.aecm_enabled && + aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled && + aecm_routing_mode == other.aecm_routing_mode && + agc_enabled == other.agc_enabled && agc_mode == other.agc_mode && + agc_limiter_enabled == other.agc_limiter_enabled && + hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled && + ns_level == other.ns_level && + transient_suppression_enabled == other.transient_suppression_enabled && + noise_robust_agc_enabled == other.noise_robust_agc_enabled && + pre_amplifier_enabled == other.pre_amplifier_enabled && + pre_amplifier_fixed_gain_factor == + other.pre_amplifier_fixed_gain_factor && + experiments_description == other.experiments_description; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h new file mode 100644 index 0000000000..6f2eb64f3a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/aec_dump.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ + +#include + +#include + +#include "absl/base/attributes.h" +#include "absl/types/optional.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Struct for passing current config from APM without having to +// include protobuf headers. +struct InternalAPMConfig { + InternalAPMConfig(); + InternalAPMConfig(const InternalAPMConfig&); + InternalAPMConfig(InternalAPMConfig&&); + + InternalAPMConfig& operator=(const InternalAPMConfig&); + InternalAPMConfig& operator=(InternalAPMConfig&&) = delete; + + bool operator==(const InternalAPMConfig& other) const; + + bool aec_enabled = false; + bool aec_delay_agnostic_enabled = false; + bool aec_drift_compensation_enabled = false; + bool aec_extended_filter_enabled = false; + int aec_suppression_level = 0; + bool aecm_enabled = false; + bool aecm_comfort_noise_enabled = false; + int aecm_routing_mode = 0; + bool agc_enabled = false; + int agc_mode = 0; + bool agc_limiter_enabled = false; + bool hpf_enabled = false; + bool ns_enabled = false; + int ns_level = 0; + bool transient_suppression_enabled = false; + bool noise_robust_agc_enabled = false; + bool pre_amplifier_enabled = false; + float pre_amplifier_fixed_gain_factor = 1.f; + std::string experiments_description = ""; +}; + +// An interface for recording configuration and input/output streams +// of the Audio Processing Module. The recordings are called +// 'aec-dumps' and are stored in a protobuf format defined in +// debug.proto. +// The Write* methods are always safe to call concurrently or +// otherwise for all implementing subclasses. The intended mode of +// operation is to create a protobuf object from the input, and send +// it away to be written to file asynchronously. +class AecDump { + public: + struct AudioProcessingState { + int delay; + int drift; + absl::optional applied_input_volume; + bool keypress; + }; + + virtual ~AecDump() = default; + + // Logs Event::Type INIT message. + virtual void WriteInitMessage(const ProcessingConfig& api_format, + int64_t time_now_ms) = 0; + ABSL_DEPRECATED("") + void WriteInitMessage(const ProcessingConfig& api_format) { + WriteInitMessage(api_format, 0); + } + + // Logs Event::Type STREAM message. To log an input/output pair, + // call the AddCapture* and AddAudioProcessingState methods followed + // by a WriteCaptureStreamMessage call. + virtual void AddCaptureStreamInput( + const AudioFrameView& src) = 0; + virtual void AddCaptureStreamOutput( + const AudioFrameView& src) = 0; + virtual void AddCaptureStreamInput(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void AddCaptureStreamOutput(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0; + virtual void WriteCaptureStreamMessage() = 0; + + // Logs Event::Type REVERSE_STREAM message. + virtual void WriteRenderStreamMessage(const int16_t* const data, + int num_channels, + int samples_per_channel) = 0; + virtual void WriteRenderStreamMessage( + const AudioFrameView& src) = 0; + + virtual void WriteRuntimeSetting( + const AudioProcessing::RuntimeSetting& runtime_setting) = 0; + + // Logs Event::Type CONFIG message. + virtual void WriteConfig(const InternalAPMConfig& config) = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc new file mode 100644 index 0000000000..7cc4fb75e4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_frame_proxies.h" + +#include "api/audio/audio_frame.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) { + if (!frame || !ap) { + return AudioProcessing::Error::kNullPointerError; + } + + StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_); + StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_); + RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames()); + + int result = ap->ProcessStream(frame->data(), input_config, output_config, + frame->mutable_data()); + + AudioProcessingStats stats = ap->GetStatistics(); + + if (stats.voice_detected) { + frame->vad_activity_ = *stats.voice_detected + ? AudioFrame::VADActivity::kVadActive + : AudioFrame::VADActivity::kVadPassive; + } + + return result; +} + +int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) { + if (!frame || !ap) { + return AudioProcessing::Error::kNullPointerError; + } + + // Must be a native rate. + if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz && + frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) { + return AudioProcessing::Error::kBadSampleRateError; + } + + if (frame->num_channels_ <= 0) { + return AudioProcessing::Error::kBadNumberChannelsError; + } + + StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_); + StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_); + + int result = ap->ProcessReverseStream(frame->data(), input_config, + output_config, frame->mutable_data()); + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h new file mode 100644 index 0000000000..5dd111ca2b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_proxies.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ + +namespace webrtc { + +class AudioFrame; +class AudioProcessing; + +// Processes a 10 ms `frame` of the primary audio stream using the provided +// AudioProcessing object. On the client-side, this is the near-end (or +// captured) audio. The `sample_rate_hz_`, `num_channels_`, and +// `samples_per_channel_` members of `frame` must be valid. If changed from the +// previous call to this function, it will trigger an initialization of the +// provided AudioProcessing object. +// The function returns any error codes passed from the AudioProcessing +// ProcessStream method. +int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame); + +// Processes a 10 ms `frame` of the reverse direction audio stream using the +// provided AudioProcessing object. The frame may be modified. On the +// client-side, this is the far-end (or to be rendered) audio. The +// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of +// `frame` must be valid. If changed from the previous call to this function, it +// will trigger an initialization of the provided AudioProcessing object. +// The function returns any error codes passed from the AudioProcessing +// ProcessReverseStream method. +int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h new file mode 100644 index 0000000000..164784a7cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_frame_view.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Class to pass audio data in T** format, where T is a numeric type. +template +class AudioFrameView { + public: + // `num_channels` and `channel_size` describe the T** + // `audio_samples`. `audio_samples` is assumed to point to a + // two-dimensional |num_channels * channel_size| array of floats. + AudioFrameView(T* const* audio_samples, int num_channels, int channel_size) + : audio_samples_(audio_samples), + num_channels_(num_channels), + channel_size_(channel_size) { + RTC_DCHECK_GE(num_channels_, 0); + RTC_DCHECK_GE(channel_size_, 0); + } + + // Implicit cast to allow converting Frame to + // Frame. + template + AudioFrameView(AudioFrameView other) + : audio_samples_(other.data()), + num_channels_(other.num_channels()), + channel_size_(other.samples_per_channel()) {} + + AudioFrameView() = delete; + + int num_channels() const { return num_channels_; } + + int samples_per_channel() const { return channel_size_; } + + rtc::ArrayView channel(int idx) { + RTC_DCHECK_LE(0, idx); + RTC_DCHECK_LE(idx, num_channels_); + return rtc::ArrayView(audio_samples_[idx], channel_size_); + } + + rtc::ArrayView channel(int idx) const { + RTC_DCHECK_LE(0, idx); + RTC_DCHECK_LE(idx, num_channels_); + return rtc::ArrayView(audio_samples_[idx], channel_size_); + } + + T* const* data() { return audio_samples_; } + + private: + T* const* audio_samples_; + int num_channels_; + int channel_size_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_VIEW_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc new file mode 100644 index 0000000000..13ddcc588a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing.h" + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace { + +using Agc1Config = AudioProcessing::Config::GainController1; +using Agc2Config = AudioProcessing::Config::GainController2; + +std::string NoiseSuppressionLevelToString( + const AudioProcessing::Config::NoiseSuppression::Level& level) { + switch (level) { + case AudioProcessing::Config::NoiseSuppression::Level::kLow: + return "Low"; + case AudioProcessing::Config::NoiseSuppression::Level::kModerate: + return "Moderate"; + case AudioProcessing::Config::NoiseSuppression::Level::kHigh: + return "High"; + case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh: + return "VeryHigh"; + } + RTC_CHECK_NOTREACHED(); +} + +std::string GainController1ModeToString(const Agc1Config::Mode& mode) { + switch (mode) { + case Agc1Config::Mode::kAdaptiveAnalog: + return "AdaptiveAnalog"; + case Agc1Config::Mode::kAdaptiveDigital: + return "AdaptiveDigital"; + case Agc1Config::Mode::kFixedDigital: + return "FixedDigital"; + } + RTC_CHECK_NOTREACHED(); +} + +} // namespace + +constexpr int AudioProcessing::kNativeSampleRatesHz[]; + +void CustomProcessing::SetRuntimeSetting( + AudioProcessing::RuntimeSetting setting) {} + +bool Agc1Config::operator==(const Agc1Config& rhs) const { + const auto& analog_lhs = analog_gain_controller; + const auto& analog_rhs = rhs.analog_gain_controller; + return enabled == rhs.enabled && mode == rhs.mode && + target_level_dbfs == rhs.target_level_dbfs && + compression_gain_db == rhs.compression_gain_db && + enable_limiter == rhs.enable_limiter && + analog_lhs.enabled == analog_rhs.enabled && + analog_lhs.startup_min_volume == analog_rhs.startup_min_volume && + analog_lhs.clipped_level_min == analog_rhs.clipped_level_min && + analog_lhs.enable_digital_adaptive == + analog_rhs.enable_digital_adaptive && + analog_lhs.clipped_level_step == analog_rhs.clipped_level_step && + analog_lhs.clipped_ratio_threshold == + analog_rhs.clipped_ratio_threshold && + analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames && + analog_lhs.clipping_predictor.mode == + analog_rhs.clipping_predictor.mode && + analog_lhs.clipping_predictor.window_length == + analog_rhs.clipping_predictor.window_length && + analog_lhs.clipping_predictor.reference_window_length == + analog_rhs.clipping_predictor.reference_window_length && + analog_lhs.clipping_predictor.reference_window_delay == + analog_rhs.clipping_predictor.reference_window_delay && + analog_lhs.clipping_predictor.clipping_threshold == + analog_rhs.clipping_predictor.clipping_threshold && + analog_lhs.clipping_predictor.crest_factor_margin == + analog_rhs.clipping_predictor.crest_factor_margin && + analog_lhs.clipping_predictor.use_predicted_step == + analog_rhs.clipping_predictor.use_predicted_step; +} + +bool Agc2Config::AdaptiveDigital::operator==( + const Agc2Config::AdaptiveDigital& rhs) const { + return enabled == rhs.enabled && headroom_db == rhs.headroom_db && + max_gain_db == rhs.max_gain_db && + initial_gain_db == rhs.initial_gain_db && + max_gain_change_db_per_second == rhs.max_gain_change_db_per_second && + max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs; +} + +bool Agc2Config::InputVolumeController::operator==( + const Agc2Config::InputVolumeController& rhs) const { + return enabled == rhs.enabled; +} + +bool Agc2Config::operator==(const Agc2Config& rhs) const { + return enabled == rhs.enabled && + fixed_digital.gain_db == rhs.fixed_digital.gain_db && + adaptive_digital == rhs.adaptive_digital && + input_volume_controller == rhs.input_volume_controller; +} + +bool AudioProcessing::Config::CaptureLevelAdjustment::operator==( + const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const { + return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor && + post_gain_factor == rhs.post_gain_factor && + analog_mic_gain_emulation == rhs.analog_mic_gain_emulation; +} + +bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation:: +operator==(const AudioProcessing::Config::CaptureLevelAdjustment:: + AnalogMicGainEmulation& rhs) const { + return enabled == rhs.enabled && initial_level == rhs.initial_level; +} + +std::string AudioProcessing::Config::ToString() const { + char buf[2048]; + rtc::SimpleStringBuilder builder(buf); + builder << "AudioProcessing::Config{ " + "pipeline: { " + "maximum_internal_processing_rate: " + << pipeline.maximum_internal_processing_rate + << ", multi_channel_render: " << pipeline.multi_channel_render + << ", multi_channel_capture: " << pipeline.multi_channel_capture + << " }, pre_amplifier: { enabled: " << pre_amplifier.enabled + << ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor + << " },capture_level_adjustment: { enabled: " + << capture_level_adjustment.enabled + << ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor + << ", post_gain_factor: " << capture_level_adjustment.post_gain_factor + << ", analog_mic_gain_emulation: { enabled: " + << capture_level_adjustment.analog_mic_gain_emulation.enabled + << ", initial_level: " + << capture_level_adjustment.analog_mic_gain_emulation.initial_level + << " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled + << " }, echo_canceller: { enabled: " << echo_canceller.enabled + << ", mobile_mode: " << echo_canceller.mobile_mode + << ", enforce_high_pass_filtering: " + << echo_canceller.enforce_high_pass_filtering + << " }, noise_suppression: { enabled: " << noise_suppression.enabled + << ", level: " + << NoiseSuppressionLevelToString(noise_suppression.level) + << " }, transient_suppression: { enabled: " + << transient_suppression.enabled + << " }, gain_controller1: { enabled: " << gain_controller1.enabled + << ", mode: " << GainController1ModeToString(gain_controller1.mode) + << ", target_level_dbfs: " << gain_controller1.target_level_dbfs + << ", compression_gain_db: " << gain_controller1.compression_gain_db + << ", enable_limiter: " << gain_controller1.enable_limiter + << ", analog_gain_controller { enabled: " + << gain_controller1.analog_gain_controller.enabled + << ", startup_min_volume: " + << gain_controller1.analog_gain_controller.startup_min_volume + << ", clipped_level_min: " + << gain_controller1.analog_gain_controller.clipped_level_min + << ", enable_digital_adaptive: " + << gain_controller1.analog_gain_controller.enable_digital_adaptive + << ", clipped_level_step: " + << gain_controller1.analog_gain_controller.clipped_level_step + << ", clipped_ratio_threshold: " + << gain_controller1.analog_gain_controller.clipped_ratio_threshold + << ", clipped_wait_frames: " + << gain_controller1.analog_gain_controller.clipped_wait_frames + << ", clipping_predictor: { enabled: " + << gain_controller1.analog_gain_controller.clipping_predictor.enabled + << ", mode: " + << gain_controller1.analog_gain_controller.clipping_predictor.mode + << ", window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .window_length + << ", reference_window_length: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_length + << ", reference_window_delay: " + << gain_controller1.analog_gain_controller.clipping_predictor + .reference_window_delay + << ", clipping_threshold: " + << gain_controller1.analog_gain_controller.clipping_predictor + .clipping_threshold + << ", crest_factor_margin: " + << gain_controller1.analog_gain_controller.clipping_predictor + .crest_factor_margin + << ", use_predicted_step: " + << gain_controller1.analog_gain_controller.clipping_predictor + .use_predicted_step + << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled + << ", fixed_digital: { gain_db: " + << gain_controller2.fixed_digital.gain_db + << " }, adaptive_digital: { enabled: " + << gain_controller2.adaptive_digital.enabled + << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db + << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db + << ", initial_gain_db: " + << gain_controller2.adaptive_digital.initial_gain_db + << ", max_gain_change_db_per_second: " + << gain_controller2.adaptive_digital.max_gain_change_db_per_second + << ", max_output_noise_level_dbfs: " + << gain_controller2.adaptive_digital.max_output_noise_level_dbfs + << " }, input_volume_control : { enabled " + << gain_controller2.input_volume_controller.enabled << "}}"; + return builder.str(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h new file mode 100644 index 0000000000..f613a38de1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing.h @@ -0,0 +1,941 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif + +#include +#include // size_t +#include // FILE +#include + +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/audio/echo_control.h" +#include "api/scoped_refptr.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/ref_count.h" +#include "rtc_base/system/file_wrapper.h" +#include "rtc_base/system/rtc_export.h" + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +class AecDump; +class AudioBuffer; + +class StreamConfig; +class ProcessingConfig; + +class EchoDetector; +class CustomAudioAnalyzer; +class CustomProcessing; + +// The Audio Processing Module (APM) provides a collection of voice processing +// components designed for real-time communications software. +// +// APM operates on two audio streams on a frame-by-frame basis. Frames of the +// primary stream, on which all processing is applied, are passed to +// `ProcessStream()`. Frames of the reverse direction stream are passed to +// `ProcessReverseStream()`. On the client-side, this will typically be the +// near-end (capture) and far-end (render) streams, respectively. APM should be +// placed in the signal chain as close to the audio hardware abstraction layer +// (HAL) as possible. +// +// On the server-side, the reverse stream will normally not be used, with +// processing occurring on each incoming stream. +// +// Component interfaces follow a similar pattern and are accessed through +// corresponding getters in APM. All components are disabled at create-time, +// with default settings that are recommended for most situations. New settings +// can be applied without enabling a component. Enabling a component triggers +// memory allocation and initialization to allow it to start processing the +// streams. +// +// Thread safety is provided with the following assumptions to reduce locking +// overhead: +// 1. The stream getters and setters are called from the same thread as +// ProcessStream(). More precisely, stream functions are never called +// concurrently with ProcessStream(). +// 2. Parameter getters are never called concurrently with the corresponding +// setter. +// +// APM accepts only linear PCM audio data in chunks of ~10 ms (see +// AudioProcessing::GetFrameSize() for details) and sample rates ranging from +// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the +// float interfaces use deinterleaved data. +// +// Usage example, omitting error checking: +// rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); +// +// AudioProcessing::Config config; +// config.echo_canceller.enabled = true; +// config.echo_canceller.mobile_mode = false; +// +// config.gain_controller1.enabled = true; +// config.gain_controller1.mode = +// AudioProcessing::Config::GainController1::kAdaptiveAnalog; +// config.gain_controller1.analog_level_minimum = 0; +// config.gain_controller1.analog_level_maximum = 255; +// +// config.gain_controller2.enabled = true; +// +// config.high_pass_filter.enabled = true; +// +// apm->ApplyConfig(config) +// +// // Start a voice call... +// +// // ... Render frame arrives bound for the audio HAL ... +// apm->ProcessReverseStream(render_frame); +// +// // ... Capture frame arrives from the audio HAL ... +// // Call required set_stream_ functions. +// apm->set_stream_delay_ms(delay_ms); +// apm->set_stream_analog_level(analog_level); +// +// apm->ProcessStream(capture_frame); +// +// // Call required stream_ functions. +// analog_level = apm->recommended_stream_analog_level(); +// has_voice = apm->stream_has_voice(); +// +// // Repeat render and capture processing for the duration of the call... +// // Start a new call... +// apm->Initialize(); +// +// // Close the application... +// apm.reset(); +// +class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { + public: + // The struct below constitutes the new parameter scheme for the audio + // processing. It is being introduced gradually and until it is fully + // introduced, it is prone to change. + // TODO(peah): Remove this comment once the new config scheme is fully rolled + // out. + // + // The parameters and behavior of the audio processing module are controlled + // by changing the default values in the AudioProcessing::Config struct. + // The config is applied by passing the struct to the ApplyConfig method. + // + // This config is intended to be used during setup, and to enable/disable + // top-level processing effects. Use during processing may cause undesired + // submodule resets, affecting the audio quality. Use the RuntimeSetting + // construct for runtime configuration. + struct RTC_EXPORT Config { + // Sets the properties of the audio processing pipeline. + struct RTC_EXPORT Pipeline { + // Ways to downmix a multi-channel track to mono. + enum class DownmixMethod { + kAverageChannels, // Average across channels. + kUseFirstChannel // Use the first channel. + }; + + // Maximum allowed processing rate used internally. May only be set to + // 32000 or 48000 and any differing values will be treated as 48000. + int maximum_internal_processing_rate = 48000; + // Allow multi-channel processing of render audio. + bool multi_channel_render = false; + // Allow multi-channel processing of capture audio when AEC3 is active + // or a custom AEC is injected.. + bool multi_channel_capture = false; + // Indicates how to downmix multi-channel capture audio to mono (when + // needed). + DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels; + } pipeline; + + // Enabled the pre-amplifier. It amplifies the capture signal + // before any other processing is done. + // TODO(webrtc:5298): Deprecate and use the pre-gain functionality in + // capture_level_adjustment instead. + struct PreAmplifier { + bool enabled = false; + float fixed_gain_factor = 1.0f; + } pre_amplifier; + + // Functionality for general level adjustment in the capture pipeline. This + // should not be used together with the legacy PreAmplifier functionality. + struct CaptureLevelAdjustment { + bool operator==(const CaptureLevelAdjustment& rhs) const; + bool operator!=(const CaptureLevelAdjustment& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + // The `pre_gain_factor` scales the signal before any processing is done. + float pre_gain_factor = 1.0f; + // The `post_gain_factor` scales the signal after all processing is done. + float post_gain_factor = 1.0f; + struct AnalogMicGainEmulation { + bool operator==(const AnalogMicGainEmulation& rhs) const; + bool operator!=(const AnalogMicGainEmulation& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + // Initial analog gain level to use for the emulated analog gain. Must + // be in the range [0...255]. + int initial_level = 255; + } analog_mic_gain_emulation; + } capture_level_adjustment; + + struct HighPassFilter { + bool enabled = false; + bool apply_in_full_band = true; + } high_pass_filter; + + struct EchoCanceller { + bool enabled = false; + bool mobile_mode = false; + bool export_linear_aec_output = false; + // Enforce the highpass filter to be on (has no effect for the mobile + // mode). + bool enforce_high_pass_filtering = true; + } echo_canceller; + + // Enables background noise suppression. + struct NoiseSuppression { + bool enabled = false; + enum Level { kLow, kModerate, kHigh, kVeryHigh }; + Level level = kModerate; + bool analyze_linear_aec_output_when_available = false; + } noise_suppression; + + // Enables transient suppression. + struct TransientSuppression { + bool enabled = false; + } transient_suppression; + + // Enables automatic gain control (AGC) functionality. + // The automatic gain control (AGC) component brings the signal to an + // appropriate range. This is done by applying a digital gain directly and, + // in the analog mode, prescribing an analog gain to be applied at the audio + // HAL. + // Recommended to be enabled on the client-side. + struct RTC_EXPORT GainController1 { + bool operator==(const GainController1& rhs) const; + bool operator!=(const GainController1& rhs) const { + return !(*this == rhs); + } + + bool enabled = false; + enum Mode { + // Adaptive mode intended for use if an analog volume control is + // available on the capture device. It will require the user to provide + // coupling between the OS mixer controls and AGC through the + // stream_analog_level() functions. + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + // Adaptive mode intended for situations in which an analog volume + // control is unavailable. It operates in a similar fashion to the + // adaptive analog mode, but with scaling instead applied in the digital + // domain. As with the analog mode, it additionally uses a digital + // compression stage. + kAdaptiveDigital, + // Fixed mode which enables only the digital compression stage also used + // by the two adaptive modes. + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain + // through most of the input level range, and compresses (gradually + // reduces gain with increasing level) the input signal at higher + // levels. This mode is preferred on embedded devices where the capture + // signal level is predictable, so that a known gain can be applied. + kFixedDigital + }; + Mode mode = kAdaptiveAnalog; + // Sets the target peak level (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + int target_level_dbfs = 3; + // Sets the maximum gain the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 + // will leave the signal uncompressed. Limited to [0, 90]. + // For updates after APM setup, use a RuntimeSetting instead. + int compression_gain_db = 9; + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + bool enable_limiter = true; + + // Enables the analog gain controller functionality. + struct AnalogGainController { + bool enabled = true; + // TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove. + int startup_min_volume = 0; + // Lowest analog microphone level that will be applied in response to + // clipping. + int clipped_level_min = 70; + // If true, an adaptive digital gain is applied. + bool enable_digital_adaptive = true; + // Amount the microphone level is lowered with every clipping event. + // Limited to (0, 255]. + int clipped_level_step = 15; + // Proportion of clipped samples required to declare a clipping event. + // Limited to (0.f, 1.f). + float clipped_ratio_threshold = 0.1f; + // Time in frames to wait after a clipping event before checking again. + // Limited to values higher than 0. + int clipped_wait_frames = 300; + + // Enables clipping prediction functionality. + struct ClippingPredictor { + bool enabled = false; + enum Mode { + // Clipping event prediction mode with fixed step estimation. + kClippingEventPrediction, + // Clipped peak estimation mode with adaptive step estimation. + kAdaptiveStepClippingPeakPrediction, + // Clipped peak estimation mode with fixed step estimation. + kFixedStepClippingPeakPrediction, + }; + Mode mode = kClippingEventPrediction; + // Number of frames in the sliding analysis window. + int window_length = 5; + // Number of frames in the sliding reference window. + int reference_window_length = 5; + // Reference window delay (unit: number of frames). + int reference_window_delay = 5; + // Clipping prediction threshold (dBFS). + float clipping_threshold = -1.0f; + // Crest factor drop threshold (dB). + float crest_factor_margin = 3.0f; + // If true, the recommended clipped level step is used to modify the + // analog gain. Otherwise, the predictor runs without affecting the + // analog gain. + bool use_predicted_step = true; + } clipping_predictor; + } analog_gain_controller; + } gain_controller1; + + // Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which + // replaces the AGC sub-module parametrized by `gain_controller1`. + // AGC2 brings the captured audio signal to the desired level by combining + // three different controllers (namely, input volume controller, adapative + // digital controller and fixed digital controller) and a limiter. + // TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed. + struct RTC_EXPORT GainController2 { + bool operator==(const GainController2& rhs) const; + bool operator!=(const GainController2& rhs) const { + return !(*this == rhs); + } + + // AGC2 must be created if and only if `enabled` is true. + bool enabled = false; + + // Parameters for the input volume controller, which adjusts the input + // volume applied when the audio is captured (e.g., microphone volume on + // a soundcard, input volume on HAL). + struct InputVolumeController { + bool operator==(const InputVolumeController& rhs) const; + bool operator!=(const InputVolumeController& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + } input_volume_controller; + + // Parameters for the adaptive digital controller, which adjusts and + // applies a digital gain after echo cancellation and after noise + // suppression. + struct RTC_EXPORT AdaptiveDigital { + bool operator==(const AdaptiveDigital& rhs) const; + bool operator!=(const AdaptiveDigital& rhs) const { + return !(*this == rhs); + } + bool enabled = false; + float headroom_db = 6.0f; + float max_gain_db = 30.0f; + float initial_gain_db = 8.0f; + float max_gain_change_db_per_second = 3.0f; + float max_output_noise_level_dbfs = -50.0f; + } adaptive_digital; + + // Parameters for the fixed digital controller, which applies a fixed + // digital gain after the adaptive digital controller and before the + // limiter. + struct FixedDigital { + // By setting `gain_db` to a value greater than zero, the limiter can be + // turned into a compressor that first applies a fixed gain. + float gain_db = 0.0f; + } fixed_digital; + } gain_controller2; + + std::string ToString() const; + }; + + // Specifies the properties of a setting to be passed to AudioProcessing at + // runtime. + class RuntimeSetting { + public: + enum class Type { + kNotSpecified, + kCapturePreGain, + kCaptureCompressionGain, + kCaptureFixedPostGain, + kPlayoutVolumeChange, + kCustomRenderProcessingRuntimeSetting, + kPlayoutAudioDeviceChange, + kCapturePostGain, + kCaptureOutputUsed + }; + + // Play-out audio device properties. + struct PlayoutAudioDeviceInfo { + int id; // Identifies the audio device. + int max_volume; // Maximum play-out volume. + }; + + RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {} + ~RuntimeSetting() = default; + + static RuntimeSetting CreateCapturePreGain(float gain) { + return {Type::kCapturePreGain, gain}; + } + + static RuntimeSetting CreateCapturePostGain(float gain) { + return {Type::kCapturePostGain, gain}; + } + + // Corresponds to Config::GainController1::compression_gain_db, but for + // runtime configuration. + static RuntimeSetting CreateCompressionGainDb(int gain_db) { + RTC_DCHECK_GE(gain_db, 0); + RTC_DCHECK_LE(gain_db, 90); + return {Type::kCaptureCompressionGain, static_cast(gain_db)}; + } + + // Corresponds to Config::GainController2::fixed_digital::gain_db, but for + // runtime configuration. + static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) { + RTC_DCHECK_GE(gain_db, 0.0f); + RTC_DCHECK_LE(gain_db, 90.0f); + return {Type::kCaptureFixedPostGain, gain_db}; + } + + // Creates a runtime setting to notify play-out (aka render) audio device + // changes. + static RuntimeSetting CreatePlayoutAudioDeviceChange( + PlayoutAudioDeviceInfo audio_device) { + return {Type::kPlayoutAudioDeviceChange, audio_device}; + } + + // Creates a runtime setting to notify play-out (aka render) volume changes. + // `volume` is the unnormalized volume, the maximum of which + static RuntimeSetting CreatePlayoutVolumeChange(int volume) { + return {Type::kPlayoutVolumeChange, volume}; + } + + static RuntimeSetting CreateCustomRenderSetting(float payload) { + return {Type::kCustomRenderProcessingRuntimeSetting, payload}; + } + + static RuntimeSetting CreateCaptureOutputUsedSetting( + bool capture_output_used) { + return {Type::kCaptureOutputUsed, capture_output_used}; + } + + Type type() const { return type_; } + // Getters do not return a value but instead modify the argument to protect + // from implicit casting. + void GetFloat(float* value) const { + RTC_DCHECK(value); + *value = value_.float_value; + } + void GetInt(int* value) const { + RTC_DCHECK(value); + *value = value_.int_value; + } + void GetBool(bool* value) const { + RTC_DCHECK(value); + *value = value_.bool_value; + } + void GetPlayoutAudioDeviceInfo(PlayoutAudioDeviceInfo* value) const { + RTC_DCHECK(value); + *value = value_.playout_audio_device_info; + } + + private: + RuntimeSetting(Type id, float value) : type_(id), value_(value) {} + RuntimeSetting(Type id, int value) : type_(id), value_(value) {} + RuntimeSetting(Type id, PlayoutAudioDeviceInfo value) + : type_(id), value_(value) {} + Type type_; + union U { + U() {} + U(int value) : int_value(value) {} + U(float value) : float_value(value) {} + U(PlayoutAudioDeviceInfo value) : playout_audio_device_info(value) {} + float float_value; + int int_value; + bool bool_value; + PlayoutAudioDeviceInfo playout_audio_device_info; + } value_; + }; + + ~AudioProcessing() override {} + + // Initializes internal states, while retaining all user settings. This + // should be called before beginning to process a new audio stream. However, + // it is not necessary to call before processing the first stream after + // creation. + // + // It is also not necessary to call if the audio parameters (sample + // rate and number of channels) have changed. Passing updated parameters + // directly to `ProcessStream()` and `ProcessReverseStream()` is permissible. + // If the parameters are known at init-time though, they may be provided. + // TODO(webrtc:5298): Change to return void. + virtual int Initialize() = 0; + + // The int16 interfaces require: + // - only `NativeRate`s be used + // - that the input, output and reverse rates must match + // - that `processing_config.output_stream()` matches + // `processing_config.input_stream()`. + // + // The float interfaces accept arbitrary rates and support differing input and + // output layouts, but the output must have either one channel or the same + // number of channels as the input. + virtual int Initialize(const ProcessingConfig& processing_config) = 0; + + // TODO(peah): This method is a temporary solution used to take control + // over the parameters in the audio processing module and is likely to change. + virtual void ApplyConfig(const Config& config) = 0; + + // TODO(ajm): Only intended for internal use. Make private and friend the + // necessary classes? + virtual int proc_sample_rate_hz() const = 0; + virtual int proc_split_sample_rate_hz() const = 0; + virtual size_t num_input_channels() const = 0; + virtual size_t num_proc_channels() const = 0; + virtual size_t num_output_channels() const = 0; + virtual size_t num_reverse_channels() const = 0; + + // Set to true when the output of AudioProcessing will be muted or in some + // other way not used. Ideally, the captured audio would still be processed, + // but some components may change behavior based on this information. + // Default false. This method takes a lock. To achieve this in a lock-less + // manner the PostRuntimeSetting can instead be used. + virtual void set_output_will_be_muted(bool muted) = 0; + + // Enqueues a runtime setting. + virtual void SetRuntimeSetting(RuntimeSetting setting) = 0; + + // Enqueues a runtime setting. Returns a bool indicating whether the + // enqueueing was successfull. + virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0; + + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as + // specified in `input_config` and `output_config`. `src` and `dest` may use + // the same memory, if desired. + virtual int ProcessStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // `src` points to a channel buffer, arranged according to `input_stream`. At + // output, the channels will be arranged according to `output_stream` in + // `dest`. + // + // The output must have one channel or as many channels as the input. `src` + // and `dest` may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for + // the reverse direction audio stream as specified in `input_config` and + // `output_config`. `src` and `dest` may use the same memory, if desired. + virtual int ProcessReverseStream(const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // `data` points to a channel buffer, arranged according to `reverse_config`. + virtual int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of `data` points to a channel buffer, arranged according to + // `reverse_config`. + virtual int AnalyzeReverseStream(const float* const* data, + const StreamConfig& reverse_config) = 0; + + // Returns the most recently produced ~10 ms of the linear AEC output at a + // rate of 16 kHz. If there is more than one capture channel, a mono + // representation of the input is returned. Returns true/false to indicate + // whether an output returned. + virtual bool GetLinearAecOutput( + rtc::ArrayView> linear_output) const = 0; + + // This must be called prior to ProcessStream() if and only if adaptive analog + // gain control is enabled, to pass the current analog level from the audio + // HAL. Must be within the range [0, 255]. + virtual void set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after + // `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended + // new analog level for the audio HAL. It is the user's responsibility to + // apply this level. + virtual int recommended_stream_analog_level() const = 0; + + // This must be called if and only if echo processing is enabled. + // + // Sets the `delay` in ms between ProcessReverseStream() receiving a far-end + // frame and ProcessStream() receiving a near-end frame containing the + // corresponding echo. On the client-side this can be expressed as + // delay = (t_render - t_analyze) + (t_process - t_capture) + // where, + // - t_analyze is the time a frame is passed to ProcessReverseStream() and + // t_render is the time the first sample of the same frame is rendered by + // the audio hardware. + // - t_capture is the time the first sample of a frame is captured by the + // audio hardware and t_process is the time the same frame is passed to + // ProcessStream(). + virtual int set_stream_delay_ms(int delay) = 0; + virtual int stream_delay_ms() const = 0; + + // Call to signal that a key press occurred (true) or did not occur (false) + // with this chunk of audio. + virtual void set_stream_key_pressed(bool key_pressed) = 0; + + // Creates and attaches an webrtc::AecDump for recording debugging + // information. + // The `worker_queue` may not be null and must outlive the created + // AecDump instance. |max_log_size_bytes == -1| means the log size + // will be unlimited. `handle` may not be null. The AecDump takes + // responsibility for `handle` and closes it in the destructor. A + // return value of true indicates that the file has been + // sucessfully opened, while a value of false indicates that + // opening the file failed. + virtual bool CreateAndAttachAecDump(absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) = 0; + virtual bool CreateAndAttachAecDump(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) = 0; + + // TODO(webrtc:5298) Deprecated variant. + // Attaches provided webrtc::AecDump for recording debugging + // information. Log file and maximum file size logic is supposed to + // be handled by implementing instance of AecDump. Calling this + // method when another AecDump is attached resets the active AecDump + // with a new one. This causes the d-tor of the earlier AecDump to + // be called. The d-tor call may block until all pending logging + // tasks are completed. + virtual void AttachAecDump(std::unique_ptr aec_dump) = 0; + + // If no AecDump is attached, this has no effect. If an AecDump is + // attached, it's destructor is called. The d-tor may block until + // all pending logging tasks are completed. + virtual void DetachAecDump() = 0; + + // Get audio processing statistics. + virtual AudioProcessingStats GetStatistics() = 0; + // TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument + // should be set if there are active remote tracks (this would usually be true + // during a call). If there are no remote tracks some of the stats will not be + // set by AudioProcessing, because they only make sense if there is at least + // one remote track. + virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) = 0; + + // Returns the last applied configuration. + virtual AudioProcessing::Config GetConfig() const = 0; + + enum Error { + // Fatal errors. + kNoError = 0, + kUnspecifiedError = -1, + kCreationFailedError = -2, + kUnsupportedComponentError = -3, + kUnsupportedFunctionError = -4, + kNullPointerError = -5, + kBadParameterError = -6, + kBadSampleRateError = -7, + kBadDataLengthError = -8, + kBadNumberChannelsError = -9, + kFileError = -10, + kStreamParameterNotSetError = -11, + kNotEnabledError = -12, + + // Warnings are non-fatal. + // This results when a set_stream_ parameter is out of range. Processing + // will continue, but the parameter may have been truncated. + kBadStreamParameterWarning = -13 + }; + + // Native rates supported by the integer interfaces. + enum NativeRate { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 + }; + + // TODO(kwiberg): We currently need to support a compiler (Visual C++) that + // complains if we don't explicitly state the size of the array here. Remove + // the size when that's no longer the case. + static constexpr int kNativeSampleRatesHz[4] = { + kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz}; + static constexpr size_t kNumNativeSampleRates = + arraysize(kNativeSampleRatesHz); + static constexpr int kMaxNativeSampleRateHz = + kNativeSampleRatesHz[kNumNativeSampleRates - 1]; + + // APM processes audio in chunks of about 10 ms. See GetFrameSize() for + // details. + static constexpr int kChunkSizeMs = 10; + + // Returns floor(sample_rate_hz/100): the number of samples per channel used + // as input and output to the audio processing module in calls to + // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and + // GetLinearAecOutput. + // + // This is exactly 10 ms for sample rates divisible by 100. For example: + // - 48000 Hz (480 samples per channel), + // - 44100 Hz (441 samples per channel), + // - 16000 Hz (160 samples per channel). + // + // Sample rates not divisible by 100 are received/produced in frames of + // approximately 10 ms. For example: + // - 22050 Hz (220 samples per channel, or ~9.98 ms per frame), + // - 11025 Hz (110 samples per channel, or ~9.98 ms per frame). + // These nondivisible sample rates yield lower audio quality compared to + // multiples of 100. Internal resampling to 10 ms frames causes a simulated + // clock drift effect which impacts the performance of (for example) echo + // cancellation. + static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; } +}; + +class RTC_EXPORT AudioProcessingBuilder { + public: + AudioProcessingBuilder(); + AudioProcessingBuilder(const AudioProcessingBuilder&) = delete; + AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete; + ~AudioProcessingBuilder(); + + // Sets the APM configuration. + AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) { + config_ = config; + return *this; + } + + // Sets the echo controller factory to inject when APM is created. + AudioProcessingBuilder& SetEchoControlFactory( + std::unique_ptr echo_control_factory) { + echo_control_factory_ = std::move(echo_control_factory); + return *this; + } + + // Sets the capture post-processing sub-module to inject when APM is created. + AudioProcessingBuilder& SetCapturePostProcessing( + std::unique_ptr capture_post_processing) { + capture_post_processing_ = std::move(capture_post_processing); + return *this; + } + + // Sets the render pre-processing sub-module to inject when APM is created. + AudioProcessingBuilder& SetRenderPreProcessing( + std::unique_ptr render_pre_processing) { + render_pre_processing_ = std::move(render_pre_processing); + return *this; + } + + // Sets the echo detector to inject when APM is created. + AudioProcessingBuilder& SetEchoDetector( + rtc::scoped_refptr echo_detector) { + echo_detector_ = std::move(echo_detector); + return *this; + } + + // Sets the capture analyzer sub-module to inject when APM is created. + AudioProcessingBuilder& SetCaptureAnalyzer( + std::unique_ptr capture_analyzer) { + capture_analyzer_ = std::move(capture_analyzer); + return *this; + } + + // Creates an APM instance with the specified config or the default one if + // unspecified. Injects the specified components transferring the ownership + // to the newly created APM instance - i.e., except for the config, the + // builder is reset to its initial state. + rtc::scoped_refptr Create(); + + private: + AudioProcessing::Config config_; + std::unique_ptr echo_control_factory_; + std::unique_ptr capture_post_processing_; + std::unique_ptr render_pre_processing_; + rtc::scoped_refptr echo_detector_; + std::unique_ptr capture_analyzer_; +}; + +class StreamConfig { + public: + // sample_rate_hz: The sampling rate of the stream. + // num_channels: The number of audio channels in the stream. + StreamConfig(int sample_rate_hz = 0, size_t num_channels = 0) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + num_frames_(calculate_frames(sample_rate_hz)) {} + + void set_sample_rate_hz(int value) { + sample_rate_hz_ = value; + num_frames_ = calculate_frames(value); + } + void set_num_channels(size_t value) { num_channels_ = value; } + + int sample_rate_hz() const { return sample_rate_hz_; } + + // The number of channels in the stream. + size_t num_channels() const { return num_channels_; } + + size_t num_frames() const { return num_frames_; } + size_t num_samples() const { return num_channels_ * num_frames_; } + + bool operator==(const StreamConfig& other) const { + return sample_rate_hz_ == other.sample_rate_hz_ && + num_channels_ == other.num_channels_; + } + + bool operator!=(const StreamConfig& other) const { return !(*this == other); } + + private: + static size_t calculate_frames(int sample_rate_hz) { + return static_cast(AudioProcessing::GetFrameSize(sample_rate_hz)); + } + + int sample_rate_hz_; + size_t num_channels_; + size_t num_frames_; +}; + +class ProcessingConfig { + public: + enum StreamName { + kInputStream, + kOutputStream, + kReverseInputStream, + kReverseOutputStream, + kNumStreamNames, + }; + + const StreamConfig& input_stream() const { + return streams[StreamName::kInputStream]; + } + const StreamConfig& output_stream() const { + return streams[StreamName::kOutputStream]; + } + const StreamConfig& reverse_input_stream() const { + return streams[StreamName::kReverseInputStream]; + } + const StreamConfig& reverse_output_stream() const { + return streams[StreamName::kReverseOutputStream]; + } + + StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } + StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } + StreamConfig& reverse_input_stream() { + return streams[StreamName::kReverseInputStream]; + } + StreamConfig& reverse_output_stream() { + return streams[StreamName::kReverseOutputStream]; + } + + bool operator==(const ProcessingConfig& other) const { + for (int i = 0; i < StreamName::kNumStreamNames; ++i) { + if (this->streams[i] != other.streams[i]) { + return false; + } + } + return true; + } + + bool operator!=(const ProcessingConfig& other) const { + return !(*this == other); + } + + StreamConfig streams[StreamName::kNumStreamNames]; +}; + +// Experimental interface for a custom analysis submodule. +class CustomAudioAnalyzer { + public: + // (Re-) Initializes the submodule. + virtual void Initialize(int sample_rate_hz, int num_channels) = 0; + // Analyzes the given capture or render signal. + virtual void Analyze(const AudioBuffer* audio) = 0; + // Returns a string representation of the module state. + virtual std::string ToString() const = 0; + + virtual ~CustomAudioAnalyzer() {} +}; + +// Interface for a custom processing submodule. +class CustomProcessing { + public: + // (Re-)Initializes the submodule. + virtual void Initialize(int sample_rate_hz, int num_channels) = 0; + // Processes the given capture or render signal. + virtual void Process(AudioBuffer* audio) = 0; + // Returns a string representation of the module state. + virtual std::string ToString() const = 0; + // Handles RuntimeSettings. TODO(webrtc:9262): make pure virtual + // after updating dependencies. + virtual void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting); + + virtual ~CustomProcessing() {} +}; + +// Interface for an echo detector submodule. +class EchoDetector : public rtc::RefCountInterface { + public: + // (Re-)Initializes the submodule. + virtual void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) = 0; + + // Analysis (not changing) of the first channel of the render signal. + virtual void AnalyzeRenderAudio(rtc::ArrayView render_audio) = 0; + + // Analysis (not changing) of the capture signal. + virtual void AnalyzeCaptureAudio( + rtc::ArrayView capture_audio) = 0; + + struct Metrics { + absl::optional echo_likelihood; + absl::optional echo_likelihood_recent_max; + }; + + // Collect current metrics from the echo detector. + virtual Metrics GetMetrics() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc new file mode 100644 index 0000000000..7139ee502e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.cc @@ -0,0 +1,22 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing_statistics.h" + +namespace webrtc { + +AudioProcessingStats::AudioProcessingStats() = default; + +AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) = + default; + +AudioProcessingStats::~AudioProcessingStats() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h new file mode 100644 index 0000000000..3b43319951 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/audio_processing_statistics.h @@ -0,0 +1,67 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ + +#include + +#include "absl/types/optional.h" +#include "rtc_base/system/rtc_export.h" + +namespace webrtc { +// This version of the stats uses Optionals, it will replace the regular +// AudioProcessingStatistics struct. +struct RTC_EXPORT AudioProcessingStats { + AudioProcessingStats(); + AudioProcessingStats(const AudioProcessingStats& other); + ~AudioProcessingStats(); + + // Deprecated. + // TODO(bugs.webrtc.org/11226): Remove. + // True if voice is detected in the last capture frame, after processing. + // It is conservative in flagging audio as speech, with low likelihood of + // incorrectly flagging a frame as voice. + // Only reported if voice detection is enabled in AudioProcessing::Config. + absl::optional voice_detected; + + // AEC Statistics. + // ERL = 10log_10(P_far / P_echo) + absl::optional echo_return_loss; + // ERLE = 10log_10(P_echo / P_out) + absl::optional echo_return_loss_enhancement; + // Fraction of time that the AEC linear filter is divergent, in a 1-second + // non-overlapped aggregation window. + absl::optional divergent_filter_fraction; + + // The delay metrics consists of the delay median and standard deviation. It + // also consists of the fraction of delay estimates that can make the echo + // cancellation perform poorly. The values are aggregated until the first + // call to `GetStatistics()` and afterwards aggregated and updated every + // second. Note that if there are several clients pulling metrics from + // `GetStatistics()` during a session the first call from any of them will + // change to one second aggregation window for all. + absl::optional delay_median_ms; + absl::optional delay_standard_deviation_ms; + + // Residual echo detector likelihood. + absl::optional residual_echo_likelihood; + // Maximum residual echo likelihood from the last time period. + absl::optional residual_echo_likelihood_recent_max; + + // The instantaneous delay estimate produced in the AEC. The unit is in + // milliseconds and the value is the instantaneous value at the time of the + // call to `GetStatistics()`. + absl::optional delay_ms; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h b/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h new file mode 100644 index 0000000000..2ea1a865c3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/include/mock_audio_processing.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { +class MockCustomProcessing : public CustomProcessing { + public: + virtual ~MockCustomProcessing() {} + MOCK_METHOD(void, + Initialize, + (int sample_rate_hz, int num_channels), + (override)); + MOCK_METHOD(void, Process, (AudioBuffer * audio), (override)); + MOCK_METHOD(void, + SetRuntimeSetting, + (AudioProcessing::RuntimeSetting setting), + (override)); + MOCK_METHOD(std::string, ToString, (), (const, override)); +}; + +class MockCustomAudioAnalyzer : public CustomAudioAnalyzer { + public: + virtual ~MockCustomAudioAnalyzer() {} + MOCK_METHOD(void, + Initialize, + (int sample_rate_hz, int num_channels), + (override)); + MOCK_METHOD(void, Analyze, (const AudioBuffer* audio), (override)); + MOCK_METHOD(std::string, ToString, (), (const, override)); +}; + +class MockEchoControl : public EchoControl { + public: + virtual ~MockEchoControl() {} + MOCK_METHOD(void, AnalyzeRender, (AudioBuffer * render), (override)); + MOCK_METHOD(void, AnalyzeCapture, (AudioBuffer * capture), (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, bool echo_path_change), + (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, + AudioBuffer* linear_output, + bool echo_path_change), + (override)); + MOCK_METHOD(Metrics, GetMetrics, (), (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(bool, ActiveProcessing, (), (const, override)); +}; + +class MockEchoDetector : public EchoDetector { + public: + virtual ~MockEchoDetector() {} + MOCK_METHOD(void, + Initialize, + (int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels), + (override)); + MOCK_METHOD(void, + AnalyzeRenderAudio, + (rtc::ArrayView render_audio), + (override)); + MOCK_METHOD(void, + AnalyzeCaptureAudio, + (rtc::ArrayView capture_audio), + (override)); + MOCK_METHOD(Metrics, GetMetrics, (), (const, override)); +}; + +class MockAudioProcessing : public AudioProcessing { + public: + MockAudioProcessing() {} + + virtual ~MockAudioProcessing() {} + + MOCK_METHOD(int, Initialize, (), (override)); + MOCK_METHOD(int, + Initialize, + (const ProcessingConfig& processing_config), + (override)); + MOCK_METHOD(void, ApplyConfig, (const Config& config), (override)); + MOCK_METHOD(int, proc_sample_rate_hz, (), (const, override)); + MOCK_METHOD(int, proc_split_sample_rate_hz, (), (const, override)); + MOCK_METHOD(size_t, num_input_channels, (), (const, override)); + MOCK_METHOD(size_t, num_proc_channels, (), (const, override)); + MOCK_METHOD(size_t, num_output_channels, (), (const, override)); + MOCK_METHOD(size_t, num_reverse_channels, (), (const, override)); + MOCK_METHOD(void, set_output_will_be_muted, (bool muted), (override)); + MOCK_METHOD(void, SetRuntimeSetting, (RuntimeSetting setting), (override)); + MOCK_METHOD(bool, PostRuntimeSetting, (RuntimeSetting setting), (override)); + MOCK_METHOD(int, + ProcessStream, + (const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest), + (override)); + MOCK_METHOD(int, + ProcessStream, + (const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest), + (override)); + MOCK_METHOD(int, + ProcessReverseStream, + (const int16_t* const src, + const StreamConfig& input_config, + const StreamConfig& output_config, + int16_t* const dest), + (override)); + MOCK_METHOD(int, + AnalyzeReverseStream, + (const float* const* data, const StreamConfig& reverse_config), + (override)); + MOCK_METHOD(int, + ProcessReverseStream, + (const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest), + (override)); + MOCK_METHOD(bool, + GetLinearAecOutput, + ((rtc::ArrayView> linear_output)), + (const, override)); + MOCK_METHOD(int, set_stream_delay_ms, (int delay), (override)); + MOCK_METHOD(int, stream_delay_ms, (), (const, override)); + MOCK_METHOD(void, set_stream_key_pressed, (bool key_pressed), (override)); + MOCK_METHOD(void, set_stream_analog_level, (int), (override)); + MOCK_METHOD(int, recommended_stream_analog_level, (), (const, override)); + MOCK_METHOD(bool, + CreateAndAttachAecDump, + (absl::string_view file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue), + (override)); + MOCK_METHOD(bool, + CreateAndAttachAecDump, + (FILE * handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue), + (override)); + MOCK_METHOD(void, AttachAecDump, (std::unique_ptr), (override)); + MOCK_METHOD(void, DetachAecDump, (), (override)); + + MOCK_METHOD(AudioProcessingStats, GetStatistics, (), (override)); + MOCK_METHOD(AudioProcessingStats, GetStatistics, (bool), (override)); + + MOCK_METHOD(AudioProcessing::Config, GetConfig, (), (const, override)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc new file mode 100644 index 0000000000..a15321ad48 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/logging/apm_data_dumper.h" + +#include "absl/strings/string_view.h" +#include "rtc_base/strings/string_builder.h" + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { +namespace { + +#if WEBRTC_APM_DEBUG_DUMP == 1 + +#if defined(WEBRTC_WIN) +constexpr char kPathDelimiter = '\\'; +#else +constexpr char kPathDelimiter = '/'; +#endif + +std::string FormFileName(absl::string_view output_dir, + absl::string_view name, + int instance_index, + int reinit_index, + absl::string_view suffix) { +#ifdef WEBRTC_WIN + char sep = '\\'; +#else + char sep = '/'; +#endif + + std::stringstream ss; + std::string base = rtc::LogMessage::aec_debug_filename(); + ss << base; + + if (base.length() && base.back() != sep) { + ss << sep; + } + + ss << name << "_" << instance_index << "-" << reinit_index << suffix; + return ss.str(); +} +#endif + +} // namespace + +#if WEBRTC_APM_DEBUG_DUMP == 1 +ApmDataDumper::ApmDataDumper(int instance_index) + : instance_index_(instance_index) + , debug_written_(0) {} +#else +ApmDataDumper::ApmDataDumper(int instance_index) {} +#endif + +ApmDataDumper::~ApmDataDumper() = default; + +#if WEBRTC_APM_DEBUG_DUMP == 1 +bool ApmDataDumper::recording_activated_ = false; +absl::optional ApmDataDumper::dump_set_to_use_; +char ApmDataDumper::output_dir_[] = ""; + +FILE* ApmDataDumper::GetRawFile(absl::string_view name) { + std::string filename = FormFileName(output_dir_, name, instance_index_, + recording_set_index_, ".dat"); + auto& f = raw_files_[filename]; + if (!f) { + f.reset(fopen(filename.c_str(), "wb")); + RTC_CHECK(f.get()) << "Cannot write to " << filename << "."; + } + return f.get(); +} + +WavWriter* ApmDataDumper::GetWavFile(absl::string_view name, + int sample_rate_hz, + int num_channels, + WavFile::SampleFormat format) { + std::string filename = FormFileName(output_dir_, name, instance_index_, + recording_set_index_, ".wav"); + auto& f = wav_files_[filename]; + if (!f) { + f.reset( + new WavWriter(filename.c_str(), sample_rate_hz, num_channels, format)); + } + return f.get(); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h new file mode 100644 index 0000000000..aa8496819b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/logging/apm_data_dumper.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ +#define MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ + +#include +#include + +#if WEBRTC_APM_DEBUG_DUMP == 1 +#include +#include +#include +#endif + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#if WEBRTC_APM_DEBUG_DUMP == 1 +#include "common_audio/wav_file.h" +#include "rtc_base/checks.h" +#include "rtc_base/string_utils.h" +#endif + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { + +#if WEBRTC_APM_DEBUG_DUMP == 1 +// Functor used to use as a custom deleter in the map of file pointers to raw +// files. +struct RawFileCloseFunctor { + void operator()(FILE* f) const { if (f) fclose(f); } +}; +#endif + +// Class that handles dumping of variables into files. +class ApmDataDumper { + public: + // Constructor that takes an instance index that may + // be used to distinguish data dumped from different + // instances of the code. + explicit ApmDataDumper(int instance_index); + + ApmDataDumper() = delete; + ApmDataDumper(const ApmDataDumper&) = delete; + ApmDataDumper& operator=(const ApmDataDumper&) = delete; + + ~ApmDataDumper(); + + // Activates or deactivate the dumping functionality. + static void SetActivated(bool activated) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + recording_activated_ = activated; +#endif + } + + // Returns whether dumping functionality is enabled/available. + static bool IsAvailable() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + return true; +#else + return false; +#endif + } + + // Default dump set. + static constexpr size_t kDefaultDumpSet = 0; + + // Specifies what dump set to use. All dump commands with a different dump set + // than the one specified will be discarded. If not specificed, all dump sets + // will be used. + static void SetDumpSetToUse(int dump_set_to_use) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + dump_set_to_use_ = dump_set_to_use; +#endif + } + + // Set an optional output directory. + static void SetOutputDirectory(absl::string_view output_dir) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + RTC_CHECK_LT(output_dir.size(), kOutputDirMaxLength); + rtc::strcpyn(output_dir_, output_dir.size(), output_dir); +#endif + } + + // Reinitializes the data dumping such that new versions + // of all files being dumped to are created. + void InitiateNewSetOfRecordings() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + ++recording_set_index_; + debug_written_ = 0; +#endif + } + + // Methods for performing dumping of data of various types into + // various formats. + void DumpRaw(absl::string_view name, + double v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const double* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + float v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const float* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, bool v, int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, static_cast(v)); + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const bool* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + for (size_t k = 0; k < v_length; ++k) { + int16_t value = static_cast(v[k]); + fwrite(&value, sizeof(value), 1, file); + } + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + int16_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const int16_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + int32_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const int32_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + size_t v_length, + const size_t* v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpRaw(name, v.size(), v.data()); + } +#endif + } + + void DumpRaw(absl::string_view name, + rtc::ArrayView v, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpWav(absl::string_view name, + size_t v_length, + const float* v, + int sample_rate_hz, + int num_channels, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + WavWriter* file = GetWavFile(name, sample_rate_hz, num_channels, + WavFile::SampleFormat::kFloat); + file->WriteSamples(v, v_length); + // Cheat and use aec_near as a stand-in for "size of the largest file" + // in the dump. We're looking to limit the total time, and that's a + // reasonable stand-in. + if (strcmp(name, "aec_near") == 0) { + updateDebugWritten(v_length * sizeof(float)); + } + } +#endif + } + + void DumpWav(absl::string_view name, + rtc::ArrayView v, + int sample_rate_hz, + int num_channels, + int dump_set = kDefaultDumpSet) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (dump_set_to_use_ && *dump_set_to_use_ != dump_set) + return; + + if (recording_activated_) { + DumpWav(name, v.size(), v.data(), sample_rate_hz, num_channels); + } +#endif + } + + private: +#if WEBRTC_APM_DEBUG_DUMP == 1 + static bool recording_activated_; + static absl::optional dump_set_to_use_; + static constexpr size_t kOutputDirMaxLength = 1024; + static char output_dir_[kOutputDirMaxLength]; + const int instance_index_; + int recording_set_index_ = 0; + std::unordered_map> + raw_files_; + std::unordered_map> wav_files_; + + FILE* GetRawFile(absl::string_view name); + WavWriter* GetWavFile(absl::string_view name, + int sample_rate_hz, + int num_channels, + WavFile::SampleFormat format); + + uint32_t debug_written_ = 0; + + void updateDebugWritten(uint32_t amount) { + debug_written_ += amount; + if (debug_written_ >= webrtc::Trace::aec_debug_size()) { + SetActivated(false); + } + } + +#endif +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn new file mode 100644 index 0000000000..8c2e9dba84 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/BUILD.gn @@ -0,0 +1,104 @@ +# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_static_library("ns") { + visibility = [ "*" ] + configs += [ "..:apm_debug_dump" ] + sources = [ + "fast_math.cc", + "fast_math.h", + "histograms.cc", + "histograms.h", + "noise_estimator.cc", + "noise_estimator.h", + "noise_suppressor.cc", + "noise_suppressor.h", + "ns_common.h", + "ns_config.h", + "ns_fft.cc", + "ns_fft.h", + "prior_signal_model.cc", + "prior_signal_model.h", + "prior_signal_model_estimator.cc", + "prior_signal_model_estimator.h", + "quantile_noise_estimator.cc", + "quantile_noise_estimator.h", + "signal_model.cc", + "signal_model.h", + "signal_model_estimator.cc", + "signal_model_estimator.h", + "speech_probability_estimator.cc", + "speech_probability_estimator.h", + "suppression_params.cc", + "suppression_params.h", + "wiener_filter.cc", + "wiener_filter.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + "..:apm_logging", + "..:audio_buffer", + "..:high_pass_filter", + "../../../api:array_view", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_128", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../system_wrappers:field_trial", + "../../../system_wrappers:metrics", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +if (rtc_include_tests) { + rtc_source_set("ns_unittests") { + testonly = true + + configs += [ "..:apm_debug_dump" ] + sources = [ "noise_suppressor_unittest.cc" ] + + deps = [ + ":ns", + "..:apm_logging", + "..:audio_buffer", + "..:audio_processing", + "..:high_pass_filter", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + "../../../test:test_support", + "../utility:cascaded_biquad_filter", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] + + defines = [] + + if (rtc_enable_protobuf) { + sources += [] + } + + if (!build_with_chromium) { + deps += [ "..:audio_processing_unittests" ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc new file mode 100644 index 0000000000..d13110c43f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/fast_math.h" + +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +float FastLog2f(float in) { + RTC_DCHECK_GT(in, .0f); + // Read and interpret float as uint32_t and then cast to float. + // This is done to extract the exponent (bits 30 - 23). + // "Right shift" of the exponent is then performed by multiplying + // with the constant (1/2^23). Finally, we subtract a constant to + // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). + union { + float dummy; + uint32_t a; + } x = {in}; + float out = x.a; + out *= 1.1920929e-7f; // 1/2^23 + out -= 126.942695f; // Remove bias. + return out; +} + +} // namespace + +float SqrtFastApproximation(float f) { + // TODO(peah): Add fast approximate implementation. + return sqrtf(f); +} + +float Pow2Approximation(float p) { + // TODO(peah): Add fast approximate implementation. + return powf(2.f, p); +} + +float PowApproximation(float x, float p) { + return Pow2Approximation(p * FastLog2f(x)); +} + +float LogApproximation(float x) { + constexpr float kLogOf2 = 0.69314718056f; + return FastLog2f(x) * kLogOf2; +} + +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = LogApproximation(x[k]); + } +} + +float ExpApproximation(float x) { + constexpr float kLog10Ofe = 0.4342944819f; + return PowApproximation(10.f, x * kLog10Ofe); +} + +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(x[k]); + } +} + +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + y[k] = ExpApproximation(-x[k]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h new file mode 100644 index 0000000000..0aefee940b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/fast_math.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ +#define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Sqrt approximation. +float SqrtFastApproximation(float f); + +// Log base conversion log(x) = log2(x)/log2(e). +float LogApproximation(float x); +void LogApproximation(rtc::ArrayView x, rtc::ArrayView y); + +// 2^x approximation. +float Pow2Approximation(float p); + +// x^p approximation. +float PowApproximation(float x, float p); + +// e^x approximation. +float ExpApproximation(float x); +void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y); +void ExpApproximationSignFlip(rtc::ArrayView x, + rtc::ArrayView y); +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc b/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc new file mode 100644 index 0000000000..1d4f4590d2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/histograms.h" + +namespace webrtc { + +Histograms::Histograms() { + Clear(); +} + +void Histograms::Clear() { + lrt_.fill(0); + spectral_flatness_.fill(0); + spectral_diff_.fill(0); +} + +void Histograms::Update(const SignalModel& features_) { + // Update the histogram for the LRT. + constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt; + if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) { + ++lrt_[kOneByBinSizeLrt * features_.lrt]; + } + + // Update histogram for the spectral flatness. + constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat; + if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat && + features_.spectral_flatness >= 0.f) { + ++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat]; + } + + // Update histogram for the spectral difference. + constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff; + if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff && + features_.spectral_diff >= 0.f) { + ++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/histograms.h b/third_party/libwebrtc/modules/audio_processing/ns/histograms.h new file mode 100644 index 0000000000..9640e743cf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/histograms.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +constexpr int kHistogramSize = 1000; + +// Class for handling the updating of histograms. +class Histograms { + public: + Histograms(); + Histograms(const Histograms&) = delete; + Histograms& operator=(const Histograms&) = delete; + + // Clears the histograms. + void Clear(); + + // Extracts thresholds for feature parameters and updates the corresponding + // histogram. + void Update(const SignalModel& features_); + + // Methods for accessing the histograms. + rtc::ArrayView get_lrt() const { return lrt_; } + rtc::ArrayView get_spectral_flatness() const { + return spectral_flatness_; + } + rtc::ArrayView get_spectral_diff() const { + return spectral_diff_; + } + + private: + std::array lrt_; + std::array spectral_flatness_; + std::array spectral_diff_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc new file mode 100644 index 0000000000..5367545f25 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Log(i). +constexpr std::array log_table = { + 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, + 1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f, + 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, + 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, + 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, + 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, + 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, + 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, + 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, + 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, + 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, + 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, + 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, + 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, + 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, + 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, + 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, + 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, + 4.836282f, 4.844187f, 4.852030f}; + +} // namespace + +NoiseEstimator::NoiseEstimator(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + noise_spectrum_.fill(0.f); + prev_noise_spectrum_.fill(0.f); + conservative_noise_spectrum_.fill(0.f); + parametric_noise_spectrum_.fill(0.f); +} + +void NoiseEstimator::PrepareAnalysis() { + std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), + prev_noise_spectrum_.begin()); +} + +void NoiseEstimator::PreUpdate( + int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum) { + quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + // Compute simplified noise model during startup. + const size_t kStartBand = 5; + float sum_log_i_log_magn = 0.f; + float sum_log_i = 0.f; + float sum_log_i_square = 0.f; + float sum_log_magn = 0.f; + for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { + float log_i = log_table[i]; + sum_log_i += log_i; + sum_log_i_square += log_i * log_i; + float log_signal = LogApproximation(signal_spectrum[i]); + sum_log_magn += log_signal; + sum_log_i_log_magn += log_i * log_signal; + } + + // Estimate the parameter for the level of the white noise. + constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * + suppression_params_.over_subtraction_factor; + + // Estimate pink noise parameters. + float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - + sum_log_i * sum_log_i; + float num = + sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + float pink_noise_adjustment = num / denom; + + // Constrain the estimated spectrum to be positive. + pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); + pink_noise_numerator_ += pink_noise_adjustment; + num = sum_log_i * sum_log_magn - + (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; + RTC_DCHECK_NE(denom, 0.f); + pink_noise_adjustment = num / denom; + + // Constrain the pink noise power to be in the interval [0, 1]. + pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); + + pink_noise_exp_ += pink_noise_adjustment; + + const float one_by_num_analyzed_frames_plus_1 = + 1.f / (num_analyzed_frames + 1.f); + + // Calculate the frequency-independent parts of parametric noise estimate. + float parametric_exp = 0.f; + float parametric_num = 0.f; + if (pink_noise_exp_ > 0.f) { + // Use pink noise estimate. + parametric_num = ExpApproximation(pink_noise_numerator_ * + one_by_num_analyzed_frames_plus_1); + parametric_num *= num_analyzed_frames + 1.f; + parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; + } + + constexpr float kOneByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Estimate the background noise using the white and pink noise + // parameters. + if (pink_noise_exp_ == 0.f) { + // Use white noise estimate. + parametric_noise_spectrum_[i] = white_noise_level_; + } else { + // Use pink noise estimate. + float use_band = i < kStartBand ? kStartBand : i; + float denom = PowApproximation(use_band, parametric_exp); + RTC_DCHECK_NE(denom, 0.f); + parametric_noise_spectrum_[i] = parametric_num / denom; + } + } + + // Weight quantile noise with modeled noise. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + noise_spectrum_[i] *= num_analyzed_frames; + float tmp = parametric_noise_spectrum_[i] * + (kShortStartupPhaseBlocks - num_analyzed_frames); + noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; + noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; + } + } +} + +void NoiseEstimator::PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum) { + // Time-avg parameter for noise_spectrum update. + constexpr float kNoiseUpdate = 0.9f; + + float gamma = kNoiseUpdate; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + const float prob_speech = speech_probability[i]; + const float prob_non_speech = 1.f - prob_speech; + + // Temporary noise update used for speech frames if update value is less + // than previous. + float noise_update_tmp = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + + // Time-constant based on speech/noise_spectrum state. + float gamma_old = gamma; + + // Increase gamma for frame likely to be seech. + constexpr float kProbRange = .2f; + gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; + + // Conservative noise_spectrum update. + if (prob_speech < kProbRange) { + conservative_noise_spectrum_[i] += + 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); + } + + // Noise_spectrum update. + if (gamma == gamma_old) { + noise_spectrum_[i] = noise_update_tmp; + } else { + noise_spectrum_[i] = + gamma * prev_noise_spectrum_[i] + + (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + + prob_speech * prev_noise_spectrum_[i]); + // Allow for noise_spectrum update downwards: If noise_spectrum update + // decreases the noise_spectrum, it is safe, so allow it to happen. + noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h new file mode 100644 index 0000000000..0c0466a679 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/quantile_noise_estimator.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Class for estimating the spectral characteristics of the noise in an incoming +// signal. +class NoiseEstimator { + public: + explicit NoiseEstimator(const SuppressionParams& suppression_params); + + // Prepare the estimator for analysis of a new frame. + void PrepareAnalysis(); + + // Performs the first step of the estimator update. + void PreUpdate(int32_t num_analyzed_frames, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum); + + // Performs the second step of the estimator update. + void PostUpdate( + rtc::ArrayView speech_probability, + rtc::ArrayView signal_spectrum); + + // Returns the noise spectral estimate. + rtc::ArrayView get_noise_spectrum() const { + return noise_spectrum_; + } + + // Returns the noise from the previous frame. + rtc::ArrayView get_prev_noise_spectrum() + const { + return prev_noise_spectrum_; + } + + // Returns a noise spectral estimate based on white and pink noise parameters. + rtc::ArrayView get_parametric_noise_spectrum() + const { + return parametric_noise_spectrum_; + } + rtc::ArrayView + get_conservative_noise_spectrum() const { + return conservative_noise_spectrum_; + } + + private: + const SuppressionParams& suppression_params_; + float white_noise_level_ = 0.f; + float pink_noise_numerator_ = 0.f; + float pink_noise_exp_ = 0.f; + std::array prev_noise_spectrum_; + std::array conservative_noise_spectrum_; + std::array parametric_noise_spectrum_; + std::array noise_spectrum_; + QuantileNoiseEstimator quantile_noise_estimator_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc new file mode 100644 index 0000000000..d66faa6ed4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppressor.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Maps sample rate to number of bands. +size_t NumBandsForRate(size_t sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000 || + sample_rate_hz == 48000); + return sample_rate_hz / 16000; +} + +// Maximum number of channels for which the channel data is stored on +// the stack. If the number of channels are larger than this, they are stored +// using scratch memory that is pre-allocated on the heap. The reason for this +// partitioning is not to waste heap space for handling the more common numbers +// of channels, while at the same time not limiting the support for higher +// numbers of channels by enforcing the channel data to be stored on the +// stack using a fixed maximum value. +constexpr size_t kMaxNumChannelsOnStack = 2; + +// Chooses the number of channels to store on the heap when that is required due +// to the number of channels being larger than the pre-defined number +// of channels to store on the stack. +size_t NumChannelsOnHeap(size_t num_channels) { + return num_channels > kMaxNumChannelsOnStack ? num_channels : 0; +} + +// Hybrib Hanning and flat window for the filterbank. +constexpr std::array kBlocks160w256FirstHalf = { + 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, + 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, + 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, + 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, + 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, + 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, + 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, + 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, + 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, + 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, + 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, + 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, + 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, + 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, + 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, + 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, + 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, + 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, + 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, + 0.99986614f}; + +// Applies the filterbank window to a buffer. +void ApplyFilterBankWindow(rtc::ArrayView x) { + for (size_t i = 0; i < 96; ++i) { + x[i] = kBlocks160w256FirstHalf[i] * x[i]; + } + + for (size_t i = 161, k = 95; i < kFftSize; ++i, --k) { + RTC_DCHECK_NE(0, k); + x[i] = kBlocks160w256FirstHalf[k] * x[i]; + } +} + +// Extends a frame with previous data. +void FormExtendedFrame(rtc::ArrayView frame, + rtc::ArrayView old_data, + rtc::ArrayView extended_frame) { + std::copy(old_data.begin(), old_data.end(), extended_frame.begin()); + std::copy(frame.begin(), frame.end(), + extended_frame.begin() + old_data.size()); + std::copy(extended_frame.end() - old_data.size(), extended_frame.end(), + old_data.begin()); +} + +// Uses overlap-and-add to produce an output frame. +void OverlapAndAdd(rtc::ArrayView extended_frame, + rtc::ArrayView overlap_memory, + rtc::ArrayView output_frame) { + for (size_t i = 0; i < kOverlapSize; ++i) { + output_frame[i] = overlap_memory[i] + extended_frame[i]; + } + std::copy(extended_frame.begin() + kOverlapSize, + extended_frame.begin() + kNsFrameSize, + output_frame.begin() + kOverlapSize); + std::copy(extended_frame.begin() + kNsFrameSize, extended_frame.end(), + overlap_memory.begin()); +} + +// Produces a delayed frame. +void DelaySignal(rtc::ArrayView frame, + rtc::ArrayView delay_buffer, + rtc::ArrayView delayed_frame) { + constexpr size_t kSamplesFromFrame = kNsFrameSize - (kFftSize - kNsFrameSize); + std::copy(delay_buffer.begin(), delay_buffer.end(), delayed_frame.begin()); + std::copy(frame.begin(), frame.begin() + kSamplesFromFrame, + delayed_frame.begin() + delay_buffer.size()); + + std::copy(frame.begin() + kSamplesFromFrame, frame.end(), + delay_buffer.begin()); +} + +// Computes the energy of an extended frame. +float ComputeEnergyOfExtendedFrame(rtc::ArrayView x) { + float energy = 0.f; + for (float x_k : x) { + energy += x_k * x_k; + } + + return energy; +} + +// Computes the energy of an extended frame based on its subcomponents. +float ComputeEnergyOfExtendedFrame( + rtc::ArrayView frame, + rtc::ArrayView old_data) { + float energy = 0.f; + for (float v : old_data) { + energy += v * v; + } + for (float v : frame) { + energy += v * v; + } + + return energy; +} + +// Computes the magnitude spectrum based on an FFT output. +void ComputeMagnitudeSpectrum( + rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView signal_spectrum) { + signal_spectrum[0] = fabsf(real[0]) + 1.f; + signal_spectrum[kFftSizeBy2Plus1 - 1] = + fabsf(real[kFftSizeBy2Plus1 - 1]) + 1.f; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + signal_spectrum[i] = + SqrtFastApproximation(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Compute prior and post SNR. +void ComputeSnr(rtc::ArrayView filter, + rtc::ArrayView prev_signal_spectrum, + rtc::ArrayView signal_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float prev_estimate = prev_signal_spectrum[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter[i]; + // Post SNR. + if (signal_spectrum[i] > noise_spectrum[i]) { + post_snr[i] = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + post_snr[i] = 0.f; + } + // The directed decision estimate of the prior SNR is a sum the current and + // previous estimates. + prior_snr[i] = 0.98f * prev_estimate + (1.f - 0.98f) * post_snr[i]; + } +} + +// Computes the attenuating gain for the noise suppression of the upper bands. +float ComputeUpperBandsGain( + float minimum_attenuating_gain, + rtc::ArrayView filter, + rtc::ArrayView speech_probability, + rtc::ArrayView prev_analysis_signal_spectrum, + rtc::ArrayView signal_spectrum) { + // Average speech prob and filter gain for the end of the lowest band. + constexpr int kNumAvgBins = 32; + constexpr float kOneByNumAvgBins = 1.f / kNumAvgBins; + + float avg_prob_speech = 0.f; + float avg_filter_gain = 0.f; + for (size_t i = kFftSizeBy2Plus1 - kNumAvgBins - 1; i < kFftSizeBy2Plus1 - 1; + i++) { + avg_prob_speech += speech_probability[i]; + avg_filter_gain += filter[i]; + } + avg_prob_speech = avg_prob_speech * kOneByNumAvgBins; + avg_filter_gain = avg_filter_gain * kOneByNumAvgBins; + + // If the speech was suppressed by a component between Analyze and Process, an + // example being by an AEC, it should not be considered speech for the purpose + // of high band suppression. To that end, the speech probability is scaled + // accordingly. + float sum_analysis_spectrum = 0.f; + float sum_processing_spectrum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + sum_analysis_spectrum += prev_analysis_signal_spectrum[i]; + sum_processing_spectrum += signal_spectrum[i]; + } + + // The magnitude spectrum computation enforces the spectrum to be strictly + // positive. + RTC_DCHECK_GT(sum_analysis_spectrum, 0.f); + avg_prob_speech *= sum_processing_spectrum / sum_analysis_spectrum; + + // Compute gain based on speech probability. + float gain = + 0.5f * (1.f + static_cast(tanh(2.f * avg_prob_speech - 1.f))); + + // Combine gain with low band gain. + if (avg_prob_speech >= 0.5f) { + gain = 0.25f * gain + 0.75f * avg_filter_gain; + } else { + gain = 0.5f * gain + 0.5f * avg_filter_gain; + } + + // Make sure gain is within flooring range. + return std::min(std::max(gain, minimum_attenuating_gain), 1.f); +} + +} // namespace + +NoiseSuppressor::ChannelState::ChannelState( + const SuppressionParams& suppression_params, + size_t num_bands) + : wiener_filter(suppression_params), + noise_estimator(suppression_params), + process_delay_memory(num_bands > 1 ? num_bands - 1 : 0) { + analyze_analysis_memory.fill(0.f); + prev_analysis_signal_spectrum.fill(1.f); + process_analysis_memory.fill(0.f); + process_synthesis_memory.fill(0.f); + for (auto& d : process_delay_memory) { + d.fill(0.f); + } +} + +NoiseSuppressor::NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels) + : num_bands_(NumBandsForRate(sample_rate_hz)), + num_channels_(num_channels), + suppression_params_(config.target_level), + filter_bank_states_heap_(NumChannelsOnHeap(num_channels_)), + upper_band_gains_heap_(NumChannelsOnHeap(num_channels_)), + energies_before_filtering_heap_(NumChannelsOnHeap(num_channels_)), + gain_adjustments_heap_(NumChannelsOnHeap(num_channels_)), + channels_(num_channels_) { + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch] = + std::make_unique(suppression_params_, num_bands_); + } +} + +void NoiseSuppressor::AggregateWienerFilters( + rtc::ArrayView filter) const { + rtc::ArrayView filter0 = + channels_[0]->wiener_filter.get_filter(); + std::copy(filter0.begin(), filter0.end(), filter.begin()); + + for (size_t ch = 1; ch < num_channels_; ++ch) { + rtc::ArrayView filter_ch = + channels_[ch]->wiener_filter.get_filter(); + + for (size_t k = 0; k < kFftSizeBy2Plus1; ++k) { + filter[k] = std::min(filter[k], filter_ch[k]); + } + } +} + +void NoiseSuppressor::Analyze(const AudioBuffer& audio) { + // Prepare the noise estimator for the analysis stage. + for (size_t ch = 0; ch < num_channels_; ++ch) { + channels_[ch]->noise_estimator.PrepareAnalysis(); + } + + // Check for zero frames. + bool zero_frame = true; + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + float energy = ComputeEnergyOfExtendedFrame( + y_band0, channels_[ch]->analyze_analysis_memory); + if (energy > 0.f) { + zero_frame = false; + break; + } + } + + if (zero_frame) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + // Only update analysis counter for frames that are properly analyzed. + if (++num_analyzed_frames_ < 0) { + num_analyzed_frames_ = 0; + } + + // Analyze all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + std::unique_ptr& ch_p = channels_[ch]; + rtc::ArrayView y_band0( + &audio.split_bands_const(ch)[0][0], kNsFrameSize); + + // Form an extended frame and apply analysis filter bank windowing. + std::array extended_frame; + FormExtendedFrame(y_band0, ch_p->analyze_analysis_memory, extended_frame); + ApplyFilterBankWindow(extended_frame); + + // Compute the magnitude spectrum. + std::array real; + std::array imag; + fft_.Fft(extended_frame, real, imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(real, imag, signal_spectrum); + + // Compute energies. + float signal_energy = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_energy += real[i] * real[i] + imag[i] * imag[i]; + } + signal_energy /= kFftSizeBy2Plus1; + + float signal_spectral_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + signal_spectral_sum += signal_spectrum[i]; + } + + // Estimate the noise spectra and the probability estimates of speech + // presence. + ch_p->noise_estimator.PreUpdate(num_analyzed_frames_, signal_spectrum, + signal_spectral_sum); + + std::array post_snr; + std::array prior_snr; + ComputeSnr(ch_p->wiener_filter.get_filter(), + ch_p->prev_analysis_signal_spectrum, signal_spectrum, + ch_p->noise_estimator.get_prev_noise_spectrum(), + ch_p->noise_estimator.get_noise_spectrum(), prior_snr, post_snr); + + ch_p->speech_probability_estimator.Update( + num_analyzed_frames_, prior_snr, post_snr, + ch_p->noise_estimator.get_conservative_noise_spectrum(), + signal_spectrum, signal_spectral_sum, signal_energy); + + ch_p->noise_estimator.PostUpdate( + ch_p->speech_probability_estimator.get_probability(), signal_spectrum); + + // Store the magnitude spectrum to make it avalilable for the process + // method. + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + ch_p->prev_analysis_signal_spectrum.begin()); + } +} + +void NoiseSuppressor::Process(AudioBuffer* audio) { + // Select the space for storing data during the processing. + std::array filter_bank_states_stack; + rtc::ArrayView filter_bank_states( + filter_bank_states_stack.data(), num_channels_); + std::array upper_band_gains_stack; + rtc::ArrayView upper_band_gains(upper_band_gains_stack.data(), + num_channels_); + std::array energies_before_filtering_stack; + rtc::ArrayView energies_before_filtering( + energies_before_filtering_stack.data(), num_channels_); + std::array gain_adjustments_stack; + rtc::ArrayView gain_adjustments(gain_adjustments_stack.data(), + num_channels_); + if (NumChannelsOnHeap(num_channels_) > 0) { + // If the stack-allocated space is too small, use the heap for storing the + // data. + filter_bank_states = rtc::ArrayView( + filter_bank_states_heap_.data(), num_channels_); + upper_band_gains = + rtc::ArrayView(upper_band_gains_heap_.data(), num_channels_); + energies_before_filtering = rtc::ArrayView( + energies_before_filtering_heap_.data(), num_channels_); + gain_adjustments = + rtc::ArrayView(gain_adjustments_heap_.data(), num_channels_); + } + + // Compute the suppression filters for all channels. + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Form an extended frame and apply analysis filter bank windowing. + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + + FormExtendedFrame(y_band0, channels_[ch]->process_analysis_memory, + filter_bank_states[ch].extended_frame); + + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + energies_before_filtering[ch] = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Perform filter bank analysis and compute the magnitude spectrum. + fft_.Fft(filter_bank_states[ch].extended_frame, filter_bank_states[ch].real, + filter_bank_states[ch].imag); + + std::array signal_spectrum; + ComputeMagnitudeSpectrum(filter_bank_states[ch].real, + filter_bank_states[ch].imag, signal_spectrum); + + // Compute the frequency domain gain filter for noise attenuation. + channels_[ch]->wiener_filter.Update( + num_analyzed_frames_, + channels_[ch]->noise_estimator.get_noise_spectrum(), + channels_[ch]->noise_estimator.get_prev_noise_spectrum(), + channels_[ch]->noise_estimator.get_parametric_noise_spectrum(), + signal_spectrum); + + if (num_bands_ > 1) { + // Compute the time-domain gain for attenuating the noise in the upper + // bands. + + upper_band_gains[ch] = ComputeUpperBandsGain( + suppression_params_.minimum_attenuating_gain, + channels_[ch]->wiener_filter.get_filter(), + channels_[ch]->speech_probability_estimator.get_probability(), + channels_[ch]->prev_analysis_signal_spectrum, signal_spectrum); + } + } + + // Only do the below processing if the output of the audio processing module + // is used. + if (!capture_output_used_) { + return; + } + + // Aggregate the Wiener filters for all channels. + std::array filter_data; + rtc::ArrayView filter = filter_data; + if (num_channels_ == 1) { + filter = channels_[0]->wiener_filter.get_filter(); + } else { + AggregateWienerFilters(filter_data); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + // Apply the filter to the lower band. + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + filter_bank_states[ch].real[i] *= filter[i]; + filter_bank_states[ch].imag[i] *= filter[i]; + } + } + + // Perform filter bank synthesis + for (size_t ch = 0; ch < num_channels_; ++ch) { + fft_.Ifft(filter_bank_states[ch].real, filter_bank_states[ch].imag, + filter_bank_states[ch].extended_frame); + } + + for (size_t ch = 0; ch < num_channels_; ++ch) { + const float energy_after_filtering = + ComputeEnergyOfExtendedFrame(filter_bank_states[ch].extended_frame); + + // Apply synthesis window. + ApplyFilterBankWindow(filter_bank_states[ch].extended_frame); + + // Compute the adjustment of the noise attenuation filter based on the + // effect of the attenuation. + gain_adjustments[ch] = + channels_[ch]->wiener_filter.ComputeOverallScalingFactor( + num_analyzed_frames_, + channels_[ch]->speech_probability_estimator.get_prior_probability(), + energies_before_filtering[ch], energy_after_filtering); + } + + // Select and apply adjustment of the noise attenuation filter based on the + // effect of the attenuation. + float gain_adjustment = gain_adjustments[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + gain_adjustment = std::min(gain_adjustment, gain_adjustments[ch]); + } + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t i = 0; i < kFftSize; ++i) { + filter_bank_states[ch].extended_frame[i] = + gain_adjustment * filter_bank_states[ch].extended_frame[i]; + } + } + + // Use overlap-and-add to form the output frame of the lowest band. + for (size_t ch = 0; ch < num_channels_; ++ch) { + rtc::ArrayView y_band0(&audio->split_bands(ch)[0][0], + kNsFrameSize); + OverlapAndAdd(filter_bank_states[ch].extended_frame, + channels_[ch]->process_synthesis_memory, y_band0); + } + + if (num_bands_ > 1) { + // Select the noise attenuating gain to apply to the upper band. + float upper_band_gain = upper_band_gains[0]; + for (size_t ch = 1; ch < num_channels_; ++ch) { + upper_band_gain = std::min(upper_band_gain, upper_band_gains[ch]); + } + + // Process the upper bands. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 1; b < num_bands_; ++b) { + // Delay the upper bands to match the delay of the filterbank applied to + // the lowest band. + rtc::ArrayView y_band( + &audio->split_bands(ch)[b][0], kNsFrameSize); + std::array delayed_frame; + DelaySignal(y_band, channels_[ch]->process_delay_memory[b - 1], + delayed_frame); + + // Apply the time-domain noise-attenuating gain. + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = upper_band_gain * delayed_frame[j]; + } + } + } + } + + // Limit the output the allowed range. + for (size_t ch = 0; ch < num_channels_; ++ch) { + for (size_t b = 0; b < num_bands_; ++b) { + rtc::ArrayView y_band(&audio->split_bands(ch)[b][0], + kNsFrameSize); + for (size_t j = 0; j < kNsFrameSize; j++) { + y_band[j] = std::min(std::max(y_band[j], -32768.f), 32767.f); + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h new file mode 100644 index 0000000000..1e321cf4a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/ns/noise_estimator.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/ns_config.h" +#include "modules/audio_processing/ns/ns_fft.h" +#include "modules/audio_processing/ns/speech_probability_estimator.h" +#include "modules/audio_processing/ns/wiener_filter.h" + +namespace webrtc { + +// Class for suppressing noise in a signal. +class NoiseSuppressor { + public: + NoiseSuppressor(const NsConfig& config, + size_t sample_rate_hz, + size_t num_channels); + NoiseSuppressor(const NoiseSuppressor&) = delete; + NoiseSuppressor& operator=(const NoiseSuppressor&) = delete; + + // Analyses the signal (typically applied before the AEC to avoid analyzing + // any comfort noise signal). + void Analyze(const AudioBuffer& audio); + + // Applies noise suppression. + void Process(AudioBuffer* audio); + + // Specifies whether the capture output will be used. The purpose of this is + // to allow the noise suppressor to deactivate some of the processing when the + // resulting output is anyway not used, for instance when the endpoint is + // muted. + void SetCaptureOutputUsage(bool capture_output_used) { + capture_output_used_ = capture_output_used; + } + + private: + const size_t num_bands_; + const size_t num_channels_; + const SuppressionParams suppression_params_; + int32_t num_analyzed_frames_ = -1; + NrFft fft_; + bool capture_output_used_ = true; + + struct ChannelState { + ChannelState(const SuppressionParams& suppression_params, size_t num_bands); + + SpeechProbabilityEstimator speech_probability_estimator; + WienerFilter wiener_filter; + NoiseEstimator noise_estimator; + std::array prev_analysis_signal_spectrum; + std::array analyze_analysis_memory; + std::array process_analysis_memory; + std::array process_synthesis_memory; + std::vector> process_delay_memory; + }; + + struct FilterBankState { + std::array real; + std::array imag; + std::array extended_frame; + }; + + std::vector filter_bank_states_heap_; + std::vector upper_band_gains_heap_; + std::vector energies_before_filtering_heap_; + std::vector gain_adjustments_heap_; + std::vector> channels_; + + // Aggregates the Wiener filters into a single filter to use. + void AggregateWienerFilters( + rtc::ArrayView filter) const; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc new file mode 100644 index 0000000000..28ea63ae40 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppressor.h" + +#include +#include +#include +#include +#include + +#include "rtc_base/strings/string_builder.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz, + size_t num_channels, + NsConfig::SuppressionLevel level) { + rtc::StringBuilder ss; + ss << "Sample rate: " << sample_rate_hz << ", num_channels: " << num_channels + << ", level: " << static_cast(level); + return ss.Release(); +} + +void PopulateInputFrameWithIdenticalChannels(size_t num_channels, + size_t num_bands, + size_t frame_index, + AudioBuffer* audio) { + for (size_t ch = 0; ch < num_channels; ++ch) { + for (size_t b = 0; b < num_bands; ++b) { + for (size_t i = 0; i < 160; ++i) { + float value = static_cast(frame_index * 160 + i); + audio->split_bands(ch)[b][i] = (value > 0 ? 5000 * b + value : 0); + } + } + } +} + +void VerifyIdenticalChannels(size_t num_channels, + size_t num_bands, + size_t frame_index, + const AudioBuffer& audio) { + EXPECT_GT(num_channels, 1u); + for (size_t ch = 1; ch < num_channels; ++ch) { + for (size_t b = 0; b < num_bands; ++b) { + for (size_t i = 0; i < 160; ++i) { + EXPECT_EQ(audio.split_bands_const(ch)[b][i], + audio.split_bands_const(0)[b][i]); + } + } + } +} + +} // namespace + +// Verifies that the same noise reduction effect is applied to all channels. +TEST(NoiseSuppressor, IdenticalChannelEffects) { + for (auto rate : {16000, 32000, 48000}) { + for (auto num_channels : {1, 4, 8}) { + for (auto level : + {NsConfig::SuppressionLevel::k6dB, NsConfig::SuppressionLevel::k12dB, + NsConfig::SuppressionLevel::k18dB, + NsConfig::SuppressionLevel::k21dB}) { + SCOPED_TRACE(ProduceDebugText(rate, num_channels, level)); + + const size_t num_bands = rate / 16000; + // const int frame_length = rtc::CheckedDivExact(rate, 100); + AudioBuffer audio(rate, num_channels, rate, num_channels, rate, + num_channels); + NsConfig cfg; + NoiseSuppressor ns(cfg, rate, num_channels); + for (size_t frame_index = 0; frame_index < 1000; ++frame_index) { + if (rate > 16000) { + audio.SplitIntoFrequencyBands(); + } + + PopulateInputFrameWithIdenticalChannels(num_channels, num_bands, + frame_index, &audio); + + ns.Analyze(audio); + ns.Process(&audio); + if (num_channels > 1) { + VerifyIdenticalChannels(num_channels, num_bands, frame_index, + audio); + } + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h new file mode 100644 index 0000000000..d6149f72a7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_common.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ + +#include + +namespace webrtc { + +constexpr size_t kFftSize = 256; +constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1; +constexpr size_t kNsFrameSize = 160; +constexpr size_t kOverlapSize = kFftSize - kNsFrameSize; + +constexpr int kShortStartupPhaseBlocks = 50; +constexpr int kLongStartupPhaseBlocks = 200; +constexpr int kFeatureUpdateWindowSize = 500; + +constexpr float kLtrFeatureThr = 0.5f; +constexpr float kBinSizeLrt = 0.1f; +constexpr float kBinSizeSpecFlat = 0.05f; +constexpr float kBinSizeSpecDiff = 0.1f; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h new file mode 100644 index 0000000000..0a285e9cea --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_config.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ + +namespace webrtc { + +// Config struct for the noise suppressor +struct NsConfig { + enum class SuppressionLevel { k6dB, k12dB, k18dB, k21dB }; + SuppressionLevel target_level = SuppressionLevel::k12dB; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc new file mode 100644 index 0000000000..264c46972c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/ns_fft.h" + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" + +namespace webrtc { + +NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) { + // Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers + // initialization) + bit_reversal_state_[0] = 0.f; + std::array tmp_buffer; + tmp_buffer.fill(0.f); + WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(), + tables_.data()); +} + +void NrFft::Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag) { + WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + imag[0] = 0; + real[0] = time_data[0]; + + imag[kFftSizeBy2Plus1 - 1] = 0; + real[kFftSizeBy2Plus1 - 1] = time_data[1]; + + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + } +} + +void NrFft::Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data) { + time_data[0] = real[0]; + time_data[1] = real[kFftSizeBy2Plus1 - 1]; + for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(), + tables_.data()); + + // Scale the output + constexpr float kScaling = 2.f / kFftSize; + for (float& d : time_data) { + d *= kScaling; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h new file mode 100644 index 0000000000..539251eef2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +// Wrapper class providing 256 point FFT functionality. +class NrFft { + public: + NrFft(); + NrFft(const NrFft&) = delete; + NrFft& operator=(const NrFft&) = delete; + + // Transforms the signal from time to frequency domain. + void Fft(rtc::ArrayView time_data, + rtc::ArrayView real, + rtc::ArrayView imag); + + // Transforms the signal from frequency to time domain. + void Ifft(rtc::ArrayView real, + rtc::ArrayView imag, + rtc::ArrayView time_data); + + private: + std::vector bit_reversal_state_; + std::vector tables_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build new file mode 100644 index 0000000000..12076eedcb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/ns_gn/moz.build @@ -0,0 +1,245 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/ns/fast_math.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/histograms.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/noise_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/noise_suppressor.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/ns_fft.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc", + "/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("ns_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc new file mode 100644 index 0000000000..f25a1e2060 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +PriorSignalModel::PriorSignalModel(float lrt_initial_value) + : lrt(lrt_initial_value) {} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h new file mode 100644 index 0000000000..dcfa7ea709 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ + +namespace webrtc { + +// Struct for storing the prior signal model parameters. +struct PriorSignalModel { + explicit PriorSignalModel(float lrt_initial_value); + PriorSignalModel(const PriorSignalModel&) = delete; + PriorSignalModel& operator=(const PriorSignalModel&) = delete; + + float lrt; + float flatness_threshold = .5f; + float template_diff_threshold = .5f; + float lrt_weighting = 1.f; + float flatness_weighting = 0.f; + float difference_weighting = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc new file mode 100644 index 0000000000..c814658e57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.cc @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" + +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Identifies the first of the two largest peaks in the histogram. +void FindFirstOfTwoLargestPeaks( + float bin_size, + rtc::ArrayView spectral_flatness, + float* peak_position, + int* peak_weight) { + RTC_DCHECK(peak_position); + RTC_DCHECK(peak_weight); + + int peak_value = 0; + int secondary_peak_value = 0; + *peak_position = 0.f; + float secondary_peak_position = 0.f; + *peak_weight = 0; + int secondary_peak_weight = 0; + + // Identify the two largest peaks. + for (int i = 0; i < kHistogramSize; ++i) { + const float bin_mid = (i + 0.5f) * bin_size; + if (spectral_flatness[i] > peak_value) { + // Found new "first" peak candidate. + secondary_peak_value = peak_value; + secondary_peak_weight = *peak_weight; + secondary_peak_position = *peak_position; + + peak_value = spectral_flatness[i]; + *peak_weight = spectral_flatness[i]; + *peak_position = bin_mid; + } else if (spectral_flatness[i] > secondary_peak_value) { + // Found new "second" peak candidate. + secondary_peak_value = spectral_flatness[i]; + secondary_peak_weight = spectral_flatness[i]; + secondary_peak_position = bin_mid; + } + } + + // Merge the peaks if they are close. + if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) && + (secondary_peak_weight > 0.5f * (*peak_weight))) { + *peak_weight += secondary_peak_weight; + *peak_position = 0.5f * (*peak_position + secondary_peak_position); + } +} + +void UpdateLrt(rtc::ArrayView lrt_histogram, + float* prior_model_lrt, + bool* low_lrt_fluctuations) { + RTC_DCHECK(prior_model_lrt); + RTC_DCHECK(low_lrt_fluctuations); + + float average = 0.f; + float average_compl = 0.f; + float average_squared = 0.f; + int count = 0; + + for (int i = 0; i < 10; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average += lrt_histogram[i] * bin_mid; + count += lrt_histogram[i]; + } + if (count > 0) { + average = average / count; + } + + for (int i = 0; i < kHistogramSize; ++i) { + float bin_mid = (i + 0.5f) * kBinSizeLrt; + average_squared += lrt_histogram[i] * bin_mid * bin_mid; + average_compl += lrt_histogram[i] * bin_mid; + } + constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize; + average_squared = average_squared * kOneFeatureUpdateWindowSize; + average_compl = average_compl * kOneFeatureUpdateWindowSize; + + // Fluctuation limit of LRT feature. + *low_lrt_fluctuations = average_squared - average * average_compl < 0.05f; + + // Get threshold for LRT feature. + constexpr float kMaxLrt = 1.f; + constexpr float kMinLrt = .2f; + if (*low_lrt_fluctuations) { + // Very low fluctuation, so likely noise. + *prior_model_lrt = kMaxLrt; + } else { + *prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average)); + } +} + +} // namespace + +PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value) + : prior_model_(lrt_initial_value) {} + +// Extract thresholds for feature parameters and computes the threshold/weights. +void PriorSignalModelEstimator::Update(const Histograms& histograms) { + bool low_lrt_fluctuations; + UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations); + + // For spectral flatness and spectral difference: compute the main peaks of + // the histograms. + float spectral_flatness_peak_position; + int spectral_flatness_peak_weight; + FindFirstOfTwoLargestPeaks( + kBinSizeSpecFlat, histograms.get_spectral_flatness(), + &spectral_flatness_peak_position, &spectral_flatness_peak_weight); + + float spectral_diff_peak_position = 0.f; + int spectral_diff_peak_weight = 0; + FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(), + &spectral_diff_peak_position, + &spectral_diff_peak_weight); + + // Reject if weight of peaks is not large enough, or peak value too small. + // Peak limit for spectral flatness (varies between 0 and 1). + const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 || + spectral_flatness_peak_position < 0.6f + ? 0 + : 1; + + // Reject if weight of peaks is not large enough or if fluctuation of the LRT + // feature are very low, indicating a noise state. + const int use_spec_diff = + spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1; + + // Update the model. + prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position; + prior_model_.template_diff_threshold = + std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold)); + + float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff); + prior_model_.lrt_weighting = one_by_feature_sum; + + if (use_spec_flat == 1) { + prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position; + prior_model_.flatness_threshold = + std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold)); + prior_model_.flatness_weighting = one_by_feature_sum; + } else { + prior_model_.flatness_weighting = 0.f; + } + + if (use_spec_diff == 1) { + prior_model_.difference_weighting = one_by_feature_sum; + } else { + prior_model_.difference_weighting = 0.f; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h new file mode 100644 index 0000000000..d178323dba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/prior_signal_model_estimator.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ + +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/prior_signal_model.h" + +namespace webrtc { + +// Estimator of the prior signal model parameters. +class PriorSignalModelEstimator { + public: + explicit PriorSignalModelEstimator(float lrt_initial_value); + PriorSignalModelEstimator(const PriorSignalModelEstimator&) = delete; + PriorSignalModelEstimator& operator=(const PriorSignalModelEstimator&) = + delete; + + // Updates the model estimate. + void Update(const Histograms& h); + + // Returns the estimated model. + const PriorSignalModel& get_prior_model() const { return prior_model_; } + + private: + PriorSignalModel prior_model_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc new file mode 100644 index 0000000000..bab494ff21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/quantile_noise_estimator.h" + +#include + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +QuantileNoiseEstimator::QuantileNoiseEstimator() { + quantile_.fill(0.f); + density_.fill(0.3f); + log_quantile_.fill(8.f); + + constexpr float kOneBySimult = 1.f / kSimult; + for (size_t i = 0; i < kSimult; ++i) { + counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult); + } +} + +void QuantileNoiseEstimator::Estimate( + rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum) { + std::array log_spectrum; + LogApproximation(signal_spectrum, log_spectrum); + + int quantile_index_to_return = -1; + // Loop over simultaneous estimates. + for (int s = 0, k = 0; s < kSimult; + ++s, k += static_cast(kFftSizeBy2Plus1)) { + const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f); + for (int i = 0, j = k; i < static_cast(kFftSizeBy2Plus1); ++i, ++j) { + // Update log quantile estimate. + const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f; + + const float multiplier = delta * one_by_counter_plus_1; + if (log_spectrum[i] > log_quantile_[j]) { + log_quantile_[j] += 0.25f * multiplier; + } else { + log_quantile_[j] -= 0.75f * multiplier; + } + + // Update density estimate. + constexpr float kWidth = 0.01f; + constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth); + if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) { + density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) * + one_by_counter_plus_1; + } + } + + if (counter_[s] >= kLongStartupPhaseBlocks) { + counter_[s] = 0; + if (num_updates_ >= kLongStartupPhaseBlocks) { + quantile_index_to_return = k; + } + } + + ++counter_[s]; + } + + // Sequentially update the noise during startup. + if (num_updates_ < kLongStartupPhaseBlocks) { + // Use the last "s" to get noise during startup that differ from zero. + quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1); + ++num_updates_; + } + + if (quantile_index_to_return >= 0) { + ExpApproximation( + rtc::ArrayView(&log_quantile_[quantile_index_to_return], + kFftSizeBy2Plus1), + quantile_); + } + + std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h new file mode 100644 index 0000000000..67d1512209 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/quantile_noise_estimator.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +constexpr int kSimult = 3; + +// For quantile noise estimation. +class QuantileNoiseEstimator { + public: + QuantileNoiseEstimator(); + QuantileNoiseEstimator(const QuantileNoiseEstimator&) = delete; + QuantileNoiseEstimator& operator=(const QuantileNoiseEstimator&) = delete; + + // Estimate noise. + void Estimate(rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum); + + private: + std::array density_; + std::array log_quantile_; + std::array quantile_; + std::array counter_; + int num_updates_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc new file mode 100644 index 0000000000..364bfd00d8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.cc @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +SignalModel::SignalModel() { + constexpr float kSfFeatureThr = 0.5f; + + lrt = kLtrFeatureThr; + spectral_flatness = kSfFeatureThr; + spectral_diff = kSfFeatureThr; + avg_log_lrt.fill(kLtrFeatureThr); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h new file mode 100644 index 0000000000..6614d38a38 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ + +#include + +#include "modules/audio_processing/ns/ns_common.h" + +namespace webrtc { + +struct SignalModel { + SignalModel(); + SignalModel(const SignalModel&) = delete; + SignalModel& operator=(const SignalModel&) = delete; + + float lrt; + float spectral_diff; + float spectral_flatness; + // Log LRT factor with time-smoothing. + std::array avg_log_lrt; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc new file mode 100644 index 0000000000..67dd3bb687 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/signal_model_estimator.h" + +#include "modules/audio_processing/ns/fast_math.h" + +namespace webrtc { + +namespace { + +constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; + +// Computes the difference measure between input spectrum and a template/learned +// noise spectrum. +float ComputeSpectralDiff( + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float diff_normalization) { + // spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2 + // / var(magnAvgPause) + + // Compute average quantities. + float noise_average = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Conservative smooth noise spectrum from pause frames. + noise_average += conservative_noise_spectrum[i]; + } + noise_average = noise_average * kOneByFftSizeBy2Plus1; + float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1; + + // Compute variance and covariance quantities. + float covariance = 0.f; + float noise_variance = 0.f; + float signal_variance = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float signal_diff = signal_spectrum[i] - signal_average; + float noise_diff = conservative_noise_spectrum[i] - noise_average; + covariance += signal_diff * noise_diff; + noise_variance += noise_diff * noise_diff; + signal_variance += signal_diff * signal_diff; + } + covariance *= kOneByFftSizeBy2Plus1; + noise_variance *= kOneByFftSizeBy2Plus1; + signal_variance *= kOneByFftSizeBy2Plus1; + + // Update of average magnitude spectrum. + float spectral_diff = + signal_variance - (covariance * covariance) / (noise_variance + 0.0001f); + // Normalize. + return spectral_diff / (diff_normalization + 0.0001f); +} + +// Updates the spectral flatness based on the input spectrum. +void UpdateSpectralFlatness( + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float* spectral_flatness) { + RTC_DCHECK(spectral_flatness); + + // Compute log of ratio of the geometric to arithmetic mean (handle the log(0) + // separately). + constexpr float kAveraging = 0.3f; + float avg_spect_flatness_num = 0.f; + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + if (signal_spectrum[i] == 0.f) { + *spectral_flatness -= kAveraging * (*spectral_flatness); + return; + } + } + + for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { + avg_spect_flatness_num += LogApproximation(signal_spectrum[i]); + } + + float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0]; + + avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1; + avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1; + + float spectral_tmp = + ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom; + + // Time-avg update of spectral flatness feature. + *spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness); +} + +// Updates the log LRT measures. +void UpdateSpectralLrt(rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView avg_log_lrt, + float* lrt) { + RTC_DCHECK(lrt); + + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + float tmp1 = 1.f + 2.f * prior_snr[i]; + float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f); + float bessel_tmp = (post_snr[i] + 1.f) * tmp2; + avg_log_lrt[i] += + .5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]); + } + + float log_lrt_time_avg_k_sum = 0.f; + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + log_lrt_time_avg_k_sum += avg_log_lrt[i]; + } + *lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1; +} + +} // namespace + +SignalModelEstimator::SignalModelEstimator() + : prior_model_estimator_(kLtrFeatureThr) {} + +void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames, + float signal_energy) { + diff_normalization_ *= num_analyzed_frames; + diff_normalization_ += signal_energy; + diff_normalization_ /= (num_analyzed_frames + 1); +} + +// Update the noise features. +void SignalModelEstimator::Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Compute spectral flatness on input spectrum. + UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum, + &features_.spectral_flatness); + + // Compute difference of input spectrum with learned/estimated noise spectrum. + float spectral_diff = + ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, diff_normalization_); + // Compute time-avg update of difference feature. + features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff); + + signal_energy_sum_ += signal_energy; + + // Compute histograms for parameter decisions (thresholds and weights for + // features). Parameters are extracted periodically. + if (--histogram_analysis_counter_ > 0) { + histograms_.Update(features_); + } else { + // Compute model parameters. + prior_model_estimator_.Update(histograms_); + + // Clear histograms for next update. + histograms_.Clear(); + + histogram_analysis_counter_ = kFeatureUpdateWindowSize; + + // Update every window: + // Compute normalization for the spectral difference for next estimation. + signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize; + diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_); + signal_energy_sum_ = 0.f; + } + + // Compute the LRT. + UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h new file mode 100644 index 0000000000..58ce00acbf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/signal_model_estimator.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/histograms.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/prior_signal_model.h" +#include "modules/audio_processing/ns/prior_signal_model_estimator.h" +#include "modules/audio_processing/ns/signal_model.h" + +namespace webrtc { + +class SignalModelEstimator { + public: + SignalModelEstimator(); + SignalModelEstimator(const SignalModelEstimator&) = delete; + SignalModelEstimator& operator=(const SignalModelEstimator&) = delete; + + // Compute signal normalization during the initial startup phase. + void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy); + + void Update( + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + const PriorSignalModel& get_prior_model() const { + return prior_model_estimator_.get_prior_model(); + } + const SignalModel& get_model() { return features_; } + + private: + float diff_normalization_ = 0.f; + float signal_energy_sum_ = 0.f; + Histograms histograms_; + int histogram_analysis_counter_ = 500; + PriorSignalModelEstimator prior_model_estimator_; + SignalModel features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc new file mode 100644 index 0000000000..fce9bc8e07 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/speech_probability_estimator.h" + +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +SpeechProbabilityEstimator::SpeechProbabilityEstimator() { + speech_probability_.fill(0.f); +} + +void SpeechProbabilityEstimator::Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy) { + // Update models. + if (num_analyzed_frames < kLongStartupPhaseBlocks) { + signal_model_estimator_.AdjustNormalization(num_analyzed_frames, + signal_energy); + } + signal_model_estimator_.Update(prior_snr, post_snr, + conservative_noise_spectrum, signal_spectrum, + signal_spectral_sum, signal_energy); + + const SignalModel& model = signal_model_estimator_.get_model(); + const PriorSignalModel& prior_model = + signal_model_estimator_.get_prior_model(); + + // Width parameter in sigmoid map for prior model. + constexpr float kWidthPrior0 = 4.f; + // Width for pause region: lower range, so increase width in tanh map. + constexpr float kWidthPrior1 = 2.f * kWidthPrior0; + + // Average LRT feature: use larger width in tanh map for pause regions. + float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator0 = + 0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f); + + // Spectral flatness feature: use larger width in tanh map for pause regions. + width_prior = model.spectral_flatness > prior_model.flatness_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator1 = + 0.5f * (tanh(1.f * width_prior * + (prior_model.flatness_threshold - model.spectral_flatness)) + + 1.f); + + // For template spectrum-difference : use larger width in tanh map for pause + // regions. + width_prior = model.spectral_diff < prior_model.template_diff_threshold + ? kWidthPrior1 + : kWidthPrior0; + + // Compute indicator function: sigmoid map. + float indicator2 = + 0.5f * (tanh(width_prior * (model.spectral_diff - + prior_model.template_diff_threshold)) + + 1.f); + + // Combine the indicator function with the feature weights. + float ind_prior = prior_model.lrt_weighting * indicator0 + + prior_model.flatness_weighting * indicator1 + + prior_model.difference_weighting * indicator2; + + // Compute the prior probability. + prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_); + + // Make sure probabilities are within range: keep floor to 0.01. + prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f); + + // Final speech probability: combine prior model with LR factor:. + float gain_prior = + (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f); + + std::array inv_lrt; + ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt); + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h new file mode 100644 index 0000000000..259c3b6776 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/speech_probability_estimator.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/signal_model_estimator.h" + +namespace webrtc { + +// Class for estimating the probability of speech. +class SpeechProbabilityEstimator { + public: + SpeechProbabilityEstimator(); + SpeechProbabilityEstimator(const SpeechProbabilityEstimator&) = delete; + SpeechProbabilityEstimator& operator=(const SpeechProbabilityEstimator&) = + delete; + + // Compute speech probability. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView prior_snr, + rtc::ArrayView post_snr, + rtc::ArrayView conservative_noise_spectrum, + rtc::ArrayView signal_spectrum, + float signal_spectral_sum, + float signal_energy); + + float get_prior_probability() const { return prior_speech_prob_; } + rtc::ArrayView get_probability() { return speech_probability_; } + + private: + SignalModelEstimator signal_model_estimator_; + float prior_speech_prob_ = .5f; + std::array speech_probability_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc new file mode 100644 index 0000000000..7bf18346f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/suppression_params.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +SuppressionParams::SuppressionParams( + NsConfig::SuppressionLevel suppression_level) { + switch (suppression_level) { + case NsConfig::SuppressionLevel::k6dB: + over_subtraction_factor = 1.f; + // 6 dB attenuation. + minimum_attenuating_gain = 0.5f; + use_attenuation_adjustment = false; + break; + case NsConfig::SuppressionLevel::k12dB: + over_subtraction_factor = 1.f; + // 12 dB attenuation. + minimum_attenuating_gain = 0.25f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k18dB: + over_subtraction_factor = 1.1f; + // 18 dB attenuation. + minimum_attenuating_gain = 0.125f; + use_attenuation_adjustment = true; + break; + case NsConfig::SuppressionLevel::k21dB: + over_subtraction_factor = 1.25f; + // 20.9 dB attenuation. + minimum_attenuating_gain = 0.09f; + use_attenuation_adjustment = true; + break; + default: + RTC_DCHECK_NOTREACHED(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h new file mode 100644 index 0000000000..ad11977d81 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/suppression_params.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ +#define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ + +#include "modules/audio_processing/ns/ns_config.h" + +namespace webrtc { + +struct SuppressionParams { + explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level); + SuppressionParams(const SuppressionParams&) = delete; + SuppressionParams& operator=(const SuppressionParams&) = delete; + + float over_subtraction_factor; + float minimum_attenuating_gain; + bool use_attenuation_adjustment; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc new file mode 100644 index 0000000000..e14b7970d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/wiener_filter.h" + +#include +#include +#include +#include + +#include "modules/audio_processing/ns/fast_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WienerFilter::WienerFilter(const SuppressionParams& suppression_params) + : suppression_params_(suppression_params) { + filter_.fill(1.f); + initial_spectral_estimate_.fill(0.f); + spectrum_prev_process_.fill(0.f); +} + +void WienerFilter::Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + // Previous estimate based on previous frame with gain filter. + float prev_tsa = spectrum_prev_process_[i] / + (prev_noise_spectrum[i] + 0.0001f) * filter_[i]; + + // Current estimate. + float current_tsa; + if (signal_spectrum[i] > noise_spectrum[i]) { + current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; + } else { + current_tsa = 0.f; + } + + // Directed decision estimate is sum of two terms: current estimate and + // previous estimate. + float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa; + filter_[i] = + snr_prior / (suppression_params_.over_subtraction_factor + snr_prior); + filter_[i] = std::max(std::min(filter_[i], 1.f), + suppression_params_.minimum_attenuating_gain); + } + + if (num_analyzed_frames < kShortStartupPhaseBlocks) { + for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { + initial_spectral_estimate_[i] += signal_spectrum[i]; + float filter_initial = initial_spectral_estimate_[i] - + suppression_params_.over_subtraction_factor * + parametric_noise_spectrum[i]; + filter_initial /= initial_spectral_estimate_[i] + 0.0001f; + + filter_initial = std::max(std::min(filter_initial, 1.f), + suppression_params_.minimum_attenuating_gain); + + // Weight the two suppression filters. + constexpr float kOnyByShortStartupPhaseBlocks = + 1.f / kShortStartupPhaseBlocks; + filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames; + filter_[i] *= num_analyzed_frames; + filter_[i] += filter_initial; + filter_[i] *= kOnyByShortStartupPhaseBlocks; + } + } + + std::copy(signal_spectrum.begin(), signal_spectrum.end(), + spectrum_prev_process_.begin()); +} + +float WienerFilter::ComputeOverallScalingFactor( + int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const { + if (!suppression_params_.use_attenuation_adjustment || + num_analyzed_frames <= kLongStartupPhaseBlocks) { + return 1.f; + } + + float gain = SqrtFastApproximation(energy_after_filtering / + (energy_before_filtering + 1.f)); + + // Scaling for new version. Threshold in final energy gain factor calculation. + constexpr float kBLim = 0.5f; + float scale_factor1 = 1.f; + if (gain > kBLim) { + scale_factor1 = 1.f + 1.3f * (gain - kBLim); + if (gain * scale_factor1 > 1.f) { + scale_factor1 = 1.f / gain; + } + } + + float scale_factor2 = 1.f; + if (gain < kBLim) { + // Do not reduce scale too much for pause regions: attenuation here should + // be controlled by flooring. + gain = std::max(gain, suppression_params_.minimum_attenuating_gain); + scale_factor2 = 1.f - 0.3f * (kBLim - gain); + } + + // Combine both scales with speech/noise prob: note prior + // (prior_speech_probability) is not frequency dependent. + return prior_speech_probability * scale_factor1 + + (1.f - prior_speech_probability) * scale_factor2; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h new file mode 100644 index 0000000000..b55c5dc59d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/ns/wiener_filter.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/ns/ns_common.h" +#include "modules/audio_processing/ns/suppression_params.h" + +namespace webrtc { + +// Estimates a Wiener-filter based frequency domain noise reduction filter. +class WienerFilter { + public: + explicit WienerFilter(const SuppressionParams& suppression_params); + WienerFilter(const WienerFilter&) = delete; + WienerFilter& operator=(const WienerFilter&) = delete; + + // Updates the filter estimate. + void Update( + int32_t num_analyzed_frames, + rtc::ArrayView noise_spectrum, + rtc::ArrayView prev_noise_spectrum, + rtc::ArrayView parametric_noise_spectrum, + rtc::ArrayView signal_spectrum); + + // Compute an overall gain scaling factor. + float ComputeOverallScalingFactor(int32_t num_analyzed_frames, + float prior_speech_probability, + float energy_before_filtering, + float energy_after_filtering) const; + + // Returns the filter. + rtc::ArrayView get_filter() const { + return filter_; + } + + private: + const SuppressionParams& suppression_params_; + std::array spectrum_prev_process_; + std::array initial_spectral_estimate_; + std::array filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc new file mode 100644 index 0000000000..cea5c837dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/optionally_built_submodule_creators.h" + +#include + +#include "modules/audio_processing/transient/transient_suppressor_impl.h" + +namespace webrtc { + +std::unique_ptr CreateTransientSuppressor( + const ApmSubmoduleCreationOverrides& overrides, + TransientSuppressor::VadMode vad_mode, + int sample_rate_hz, + int detection_rate_hz, + int num_channels) { +#ifdef WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR + return nullptr; +#else + if (overrides.transient_suppression) { + return nullptr; + } + return std::make_unique( + vad_mode, sample_rate_hz, detection_rate_hz, num_channels); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h new file mode 100644 index 0000000000..1be2743986 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ +#define MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ + +#include + +#include "modules/audio_processing/transient/transient_suppressor.h" + +namespace webrtc { + +// These overrides are only to be used for testing purposes. +// Each flag emulates a preprocessor macro to exclude a submodule of APM from +// the build, e.g. WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR. If the corresponding +// flag `transient_suppression` is enabled, then the creators will return +// nullptr instead of a submodule instance, as if the macro had been defined. +struct ApmSubmoduleCreationOverrides { + bool transient_suppression = false; +}; + +// Creates a transient suppressor. +// Will instead return nullptr if one of the following is true: +// * WEBRTC_EXCLUDE_TRANSIENT_SUPPRESSOR is defined +// * The corresponding override in `overrides` is enabled. +std::unique_ptr CreateTransientSuppressor( + const ApmSubmoduleCreationOverrides& overrides, + TransientSuppressor::VadMode vad_mode, + int sample_rate_hz, + int detection_rate_hz, + int num_channels); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_OPTIONALLY_BUILT_SUBMODULE_CREATORS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build new file mode 100644 index 0000000000..3e4fa4ca51 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators_gn/moz.build @@ -0,0 +1,232 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/optionally_built_submodule_creators.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("optionally_built_submodule_creators_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h b/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h new file mode 100644 index 0000000000..b8aff4a107 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/render_queue_item_verifier.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ +#define MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ + +#include + +namespace webrtc { + +// Functor to use when supplying a verifier function for the queue item +// verifcation. +template +class RenderQueueItemVerifier { + public: + explicit RenderQueueItemVerifier(size_t minimum_capacity) + : minimum_capacity_(minimum_capacity) {} + + bool operator()(const std::vector& v) const { + return v.capacity() >= minimum_capacity_; + } + + private: + size_t minimum_capacity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H__ diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc new file mode 100644 index 0000000000..2a564fc233 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.cc @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/residual_echo_detector.h" + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace { + +float Power(rtc::ArrayView input) { + if (input.empty()) { + return 0.f; + } + return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / + input.size(); +} + +constexpr size_t kLookbackFrames = 650; +// TODO(ivoc): Verify the size of this buffer. +constexpr size_t kRenderBufferSize = 30; +constexpr float kAlpha = 0.001f; +// 10 seconds of data, updated every 10 ms. +constexpr size_t kAggregationBufferSize = 10 * 100; + +} // namespace + +namespace webrtc { + +std::atomic ResidualEchoDetector::instance_count_(0); + +ResidualEchoDetector::ResidualEchoDetector() + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + render_buffer_(kRenderBufferSize), + render_power_(kLookbackFrames), + render_power_mean_(kLookbackFrames), + render_power_std_dev_(kLookbackFrames), + covariances_(kLookbackFrames), + recent_likelihood_max_(kAggregationBufferSize) {} + +ResidualEchoDetector::~ResidualEchoDetector() = default; + +void ResidualEchoDetector::AnalyzeRenderAudio( + rtc::ArrayView render_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), + 48000, 1); + + if (render_buffer_.Size() == 0) { + frames_since_zero_buffer_size_ = 0; + } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + render_buffer_.Pop(); + frames_since_zero_buffer_size_ = 0; + } + ++frames_since_zero_buffer_size_; + float power = Power(render_audio); + render_buffer_.Push(power); +} + +void ResidualEchoDetector::AnalyzeCaptureAudio( + rtc::ArrayView capture_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_capture", capture_audio.size(), + capture_audio.data(), 48000, 1); + + if (first_process_call_) { + // On the first process call (so the start of a call), we must flush the + // render buffer, otherwise the render data will be delayed. + render_buffer_.Clear(); + first_process_call_ = false; + } + + // Get the next render value. + const absl::optional buffered_render_power = render_buffer_.Pop(); + if (!buffered_render_power) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + return; + } + // Update the render statistics, and store the statistics in circular buffers. + render_statistics_.Update(*buffered_render_power); + RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); + render_power_[next_insertion_index_] = *buffered_render_power; + render_power_mean_[next_insertion_index_] = render_statistics_.mean(); + render_power_std_dev_[next_insertion_index_] = + render_statistics_.std_deviation(); + + // Get the next capture value, update capture statistics and add the relevant + // values to the buffers. + const float capture_power = Power(capture_audio); + capture_statistics_.Update(capture_power); + const float capture_mean = capture_statistics_.mean(); + const float capture_std_deviation = capture_statistics_.std_deviation(); + + // Update the covariance values and determine the new echo likelihood. + echo_likelihood_ = 0.f; + size_t read_index = next_insertion_index_; + + int best_delay = -1; + for (size_t delay = 0; delay < covariances_.size(); ++delay) { + RTC_DCHECK_LT(read_index, render_power_.size()); + covariances_[delay].Update(capture_power, capture_mean, + capture_std_deviation, render_power_[read_index], + render_power_mean_[read_index], + render_power_std_dev_[read_index]); + read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; + + if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { + echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); + best_delay = static_cast(delay); + } + } + // This is a temporary log message to help find the underlying cause for echo + // likelihoods > 1.0. + // TODO(ivoc): Remove once the issue is resolved. + if (echo_likelihood_ > 1.1f) { + // Make sure we don't spam the log. + if (log_counter_ < 5 && best_delay != -1) { + size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay; + if (read_index >= kLookbackFrames) { + read_index -= kLookbackFrames; + } + RTC_DCHECK_LT(read_index, render_power_.size()); + RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {" + "Echo likelihood: " + << echo_likelihood_ << ", Best Delay: " << best_delay + << ", Covariance: " + << covariances_[best_delay].covariance() + << ", Last capture power: " << capture_power + << ", Capture mean: " << capture_mean + << ", Capture_standard deviation: " + << capture_std_deviation << ", Last render power: " + << render_power_[read_index] + << ", Render mean: " << render_power_mean_[read_index] + << ", Render standard deviation: " + << render_power_std_dev_[read_index] + << ", Reliability: " << reliability_ << "}"; + log_counter_++; + } + } + RTC_DCHECK_LT(echo_likelihood_, 1.1f); + + reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; + echo_likelihood_ *= reliability_; + // This is a temporary fix to prevent echo likelihood values > 1.0. + // TODO(ivoc): Find the root cause of this issue and fix it. + echo_likelihood_ = std::min(echo_likelihood_, 1.0f); + int echo_percentage = static_cast(echo_likelihood_ * 100); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", + echo_percentage, 0, 100, 100 /* number of bins */); + + // Update the buffer of recent likelihood values. + recent_likelihood_max_.Update(echo_likelihood_); + + // Update the next insertion index. + next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) + ? next_insertion_index_ + 1 + : 0; +} + +void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, + int /*num_capture_channels*/, + int /*render_sample_rate_hz*/, + int /*num_render_channels*/) { + render_buffer_.Clear(); + std::fill(render_power_.begin(), render_power_.end(), 0.f); + std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); + std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); + render_statistics_.Clear(); + capture_statistics_.Clear(); + recent_likelihood_max_.Clear(); + for (auto& cov : covariances_) { + cov.Clear(); + } + echo_likelihood_ = 0.f; + next_insertion_index_ = 0; + reliability_ = 0.f; +} + +EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { + EchoDetector::Metrics metrics; + metrics.echo_likelihood = echo_likelihood_; + metrics.echo_likelihood_recent_max = recent_likelihood_max_.max(); + return metrics; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h new file mode 100644 index 0000000000..ac554b17c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/echo_detector/circular_buffer.h" +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" +#include "modules/audio_processing/echo_detector/moving_max.h" +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class ResidualEchoDetector : public EchoDetector { + public: + ResidualEchoDetector(); + ~ResidualEchoDetector() override; + + // This function should be called while holding the render lock. + void AnalyzeRenderAudio(rtc::ArrayView render_audio) override; + + // This function should be called while holding the capture lock. + void AnalyzeCaptureAudio(rtc::ArrayView capture_audio) override; + + // This function should be called while holding the capture lock. + void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) override; + + // This function is for testing purposes only. + void SetReliabilityForTest(float value) { reliability_ = value; } + + // This function should be called while holding the capture lock. + EchoDetector::Metrics GetMetrics() const override; + + private: + static std::atomic instance_count_; + std::unique_ptr data_dumper_; + // Keep track if the `Process` function has been previously called. + bool first_process_call_ = true; + // Buffer for storing the power of incoming farend buffers. This is needed for + // cases where calls to BufferFarend and Process are jittery. + CircularBuffer render_buffer_; + // Count how long ago it was that the size of `render_buffer_` was zero. This + // value is also reset to zero when clock drift is detected and a value from + // the renderbuffer is discarded, even though the buffer is not actually zero + // at that point. This is done to avoid repeatedly removing elements in this + // situation. + size_t frames_since_zero_buffer_size_ = 0; + + // Circular buffers containing delayed versions of the power, mean and + // standard deviation, for calculating the delayed covariance values. + std::vector render_power_; + std::vector render_power_mean_; + std::vector render_power_std_dev_; + // Covariance estimates for different delay values. + std::vector covariances_; + // Index where next element should be inserted in all of the above circular + // buffers. + size_t next_insertion_index_ = 0; + + MeanVarianceEstimator render_statistics_; + MeanVarianceEstimator capture_statistics_; + // Current echo likelihood. + float echo_likelihood_ = 0.f; + // Reliability of the current likelihood. + float reliability_ = 0.f; + MovingMax recent_likelihood_max_; + + int log_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc new file mode 100644 index 0000000000..d8c227a443 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/residual_echo_detector_unittest.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/residual_echo_detector.h" + +#include + +#include "api/make_ref_counted.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ResidualEchoDetectorTests, Echo) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(1.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, NoEcho) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal is always zero, so no echo should be + // detected. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + } + echo_detector->AnalyzeCaptureAudio(zeros); + } + // We expect to not detect any echo. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(0.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the render side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector->AnalyzeRenderAudio(zeros); + } + } + // We expect to detect echo with high likelihood. Clock drift is harder to + // correct on the render side than on the capture side. This is due to the + // render buffer, clock drift can only be discovered after a certain delay. + // A growing buffer can be caused by jitter or clock drift and it's not + // possible to make this decision right away. For this reason we only expect + // an echo likelihood of 75% in this test. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_GT(ed_metrics.echo_likelihood.value(), 0.75f); +} + +TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) { + auto echo_detector = rtc::make_ref_counted(); + echo_detector->SetReliabilityForTest(1.0f); + std::vector ones(160, 1.f); + std::vector zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the capture side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector->AnalyzeRenderAudio(ones); + echo_detector->AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(ones); + } else { + echo_detector->AnalyzeRenderAudio(zeros); + echo_detector->AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector->AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + auto ed_metrics = echo_detector->GetMetrics(); + ASSERT_TRUE(ed_metrics.echo_likelihood); + EXPECT_NEAR(1.f, ed_metrics.echo_likelihood.value(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level.cc b/third_party/libwebrtc/modules/audio_processing/rms_level.cc new file mode 100644 index 0000000000..b0a45cb403 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/rms_level.h" + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +static constexpr float kMaxSquaredLevel = 32768 * 32768; +// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10). +static constexpr float kMinLevel = 1.995262314968883e-13f; + +// Calculates the normalized RMS value from a mean square value. The input +// should be the sum of squared samples divided by the number of samples. The +// value will be normalized to full range before computing the RMS, wich is +// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very +// faint. +int ComputeRms(float mean_square) { + if (mean_square <= kMinLevel * kMaxSquaredLevel) { + // Very faint; simply return the minimum value. + return RmsLevel::kMinLevelDb; + } + // Normalize by the max level. + const float mean_square_norm = mean_square / kMaxSquaredLevel; + RTC_DCHECK_GT(mean_square_norm, kMinLevel); + // 20log_10(x^0.5) = 10log_10(x) + const float rms = 10.f * std::log10(mean_square_norm); + RTC_DCHECK_LE(rms, 0.f); + RTC_DCHECK_GT(rms, -RmsLevel::kMinLevelDb); + // Return the negated value. + return static_cast(-rms + 0.5f); +} +} // namespace + +RmsLevel::RmsLevel() { + Reset(); +} + +RmsLevel::~RmsLevel() = default; + +void RmsLevel::Reset() { + sum_square_ = 0.f; + sample_count_ = 0; + max_sum_square_ = 0.f; + block_size_ = absl::nullopt; +} + +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + const float sum_square = + std::accumulate(data.begin(), data.end(), 0.f, + [](float a, int16_t b) { return a + b * b; }); + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + +void RmsLevel::Analyze(rtc::ArrayView data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + float sum_square = 0.f; + + for (float data_k : data) { + int16_t tmp = + static_cast(std::min(std::max(data_k, -32768.f), 32767.f)); + sum_square += tmp * tmp; + } + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + +void RmsLevel::AnalyzeMuted(size_t length) { + CheckBlockSize(length); + sample_count_ += length; +} + +int RmsLevel::Average() { + const bool have_samples = (sample_count_ != 0); + int rms = have_samples ? ComputeRms(sum_square_ / sample_count_) + : RmsLevel::kMinLevelDb; + + // To ensure that kMinLevelDb represents digital silence (muted audio + // sources) we'll check here if the sum_square is actually 0. If it's not + // we'll bump up the return value to `kInaudibleButNotMuted`. + // https://datatracker.ietf.org/doc/html/rfc6464 + if (have_samples && rms == RmsLevel::kMinLevelDb && sum_square_ != 0.0f) { + rms = kInaudibleButNotMuted; + } + + Reset(); + return rms; +} + +RmsLevel::Levels RmsLevel::AverageAndPeak() { + // Note that block_size_ should by design always be non-empty when + // sample_count_ != 0. Also, the * operator of absl::optional enforces this + // with a DCHECK. + Levels levels = (sample_count_ == 0) + ? Levels{RmsLevel::kMinLevelDb, RmsLevel::kMinLevelDb} + : Levels{ComputeRms(sum_square_ / sample_count_), + ComputeRms(max_sum_square_ / *block_size_)}; + Reset(); + return levels; +} + +void RmsLevel::CheckBlockSize(size_t block_size) { + if (block_size_ != block_size) { + Reset(); + block_size_ = block_size; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level.h b/third_party/libwebrtc/modules/audio_processing/rms_level.h new file mode 100644 index 0000000000..fbece19ecd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ +#define MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" + +namespace webrtc { + +// Computes the root mean square (RMS) level in dBFs (decibels from digital +// full-scale) of audio data. The computation follows RFC 6465: +// https://tools.ietf.org/html/rfc6465 +// with the intent that it can provide the RTP audio level indication. +// +// The expected approach is to provide constant-sized chunks of audio to +// Analyze(). When enough chunks have been accumulated to form a packet, call +// Average() to get the audio level indicator for the RTP header. +class RmsLevel { + public: + struct Levels { + int average; + int peak; + }; + + enum : int { kMinLevelDb = 127, kInaudibleButNotMuted = 126 }; + + RmsLevel(); + ~RmsLevel(); + + // Can be called to reset internal states, but is not required during normal + // operation. + void Reset(); + + // Pass each chunk of audio to Analyze() to accumulate the level. + void Analyze(rtc::ArrayView data); + void Analyze(rtc::ArrayView data); + + // If all samples with the given `length` have a magnitude of zero, this is + // a shortcut to avoid some computation. + void AnalyzeMuted(size_t length); + + // Computes the RMS level over all data passed to Analyze() since the last + // call to Average(). The returned value is positive but should be interpreted + // as negative as per the RFC. It is constrained to [0, 127]. Resets the + // internal state to start a new measurement period. + int Average(); + + // Like Average() above, but also returns the RMS peak value. Resets the + // internal state to start a new measurement period. + Levels AverageAndPeak(); + + private: + // Compares `block_size` with `block_size_`. If they are different, calls + // Reset() and stores the new size. + void CheckBlockSize(size_t block_size); + + float sum_square_; + size_t sample_count_; + float max_sum_square_; + absl::optional block_size_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build new file mode 100644 index 0000000000..ec35734b6d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/rms_level.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("rms_level_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc b/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc new file mode 100644 index 0000000000..4cbad461e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/rms_level_unittest.cc @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES +#include "modules/audio_processing/rms_level.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kSampleRateHz = 48000; +constexpr size_t kBlockSizeSamples = kSampleRateHz / 100; + +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::unique_ptr RunTest(rtc::ArrayView input) { + std::unique_ptr level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::vector CreateInt16Sinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x(num_samples); + for (size_t n = 0; n < num_samples; ++n) { + x[n] = rtc::saturated_cast( + amplitude * std::sin(2 * M_PI * n * frequency_hz / kSampleRateHz)); + } + return x; +} + +std::vector CreateFloatSinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector x16 = + CreateInt16Sinusoid(frequency_hz, amplitude, num_samples); + std::vector x(x16.size()); + for (size_t n = 0; n < x.size(); ++n) { + x[n] = x16[n]; + } + return x; +} + +} // namespace + +TEST(RmsLevelTest, VerifyIndentityBetweenFloatAndFix) { + auto x_f = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto x_i = CreateFloatSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level_f = RunTest(x_f); + auto level_i = RunTest(x_i); + int avg_i = level_i->Average(); + int avg_f = level_f->Average(); + EXPECT_EQ(3, avg_i); // -3 dBFS + EXPECT_EQ(avg_f, avg_i); +} + +TEST(RmsLevelTest, Run1000HzFullScale) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(3, level->Average()); // -3 dBFS +} + +TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(3, stats.average); // -3 dBFS + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, Run1000HzHalfScale) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(9, level->Average()); // -9 dBFS +} + +TEST(RmsLevelTest, RunZeros) { + std::vector x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + EXPECT_EQ(127, level->Average()); +} + +TEST(RmsLevelTest, RunZerosAverageAndPeak) { + std::vector x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, NoSamples) { + RmsLevel level; + EXPECT_EQ(127, level.Average()); // Return minimum if no samples are given. +} + +TEST(RmsLevelTest, NoSamplesAverageAndPeak) { + RmsLevel level; + auto stats = level.AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, PollTwice) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Average(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +TEST(RmsLevelTest, Reset) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Reset(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +// Inserts 1 second of full-scale sinusoid, followed by 1 second of muted. +TEST(RmsLevelTest, ProcessMuted) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + const size_t kBlocksPerSecond = rtc::CheckedDivExact( + static_cast(kSampleRateHz), kBlockSizeSamples); + for (size_t i = 0; i < kBlocksPerSecond; ++i) { + level->AnalyzeMuted(kBlockSizeSamples); + } + EXPECT_EQ(6, level->Average()); // Average RMS halved due to the silence. +} + +// Digital silence must yield 127 and anything else should yield 126 or lower. +TEST(RmsLevelTest, OnlyDigitalSilenceIs127) { + std::vector test_buffer(kSampleRateHz, 0); + auto level = RunTest(test_buffer); + EXPECT_EQ(127, level->Average()); + // Change one sample to something other than 0 to make the buffer not strictly + // represent digital silence. + test_buffer[0] = 1; + level = RunTest(test_buffer); + EXPECT_LT(level->Average(), 127); +} + +// Inserts 1 second of half-scale sinusoid, follwed by 10 ms of full-scale, and +// finally 1 second of half-scale again. Expect the average to be -9 dBFS due +// to the vast majority of the signal being half-scale, and the peak to be +// -3 dBFS. +TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { + auto half_scale = CreateInt16Sinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto full_scale = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz / 100); + auto x = half_scale; + x.insert(x.end(), full_scale.begin(), full_scale.end()); + x.insert(x.end(), half_scale.begin(), half_scale.end()); + ASSERT_EQ(static_cast(2 * kSampleRateHz + kSampleRateHz / 100), + x.size()); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(9, stats.average); + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, ResetOnBlockSizeChange) { + auto x = CreateInt16Sinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + // Create a new signal with half amplitude, but double block length. + auto y = CreateInt16Sinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); + level->Analyze(y); + auto stats = level->AverageAndPeak(); + // Expect all stats to only be influenced by the last signal (y), since the + // changed block size should reset the stats. + EXPECT_EQ(9, stats.average); + EXPECT_EQ(9, stats.peak); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc b/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc new file mode 100644 index 0000000000..d47090bc03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/splitting_filter.h" + +#include + +#include "api/array_view.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr size_t kSamplesPerBand = 160; +constexpr size_t kTwoBandFilterSamplesPerFrame = 320; + +} // namespace + +SplittingFilter::SplittingFilter(size_t num_channels, + size_t num_bands, + size_t num_frames) + : num_bands_(num_bands), + two_bands_states_(num_bands_ == 2 ? num_channels : 0), + three_band_filter_banks_(num_bands_ == 3 ? num_channels : 0) { + RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); +} + +SplittingFilter::~SplittingFilter() = default; + +void SplittingFilter::Analysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsAnalysis(data, bands); + } else if (bands->num_bands() == 3) { + ThreeBandsAnalysis(data, bands); + } +} + +void SplittingFilter::Synthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsSynthesis(bands, data); + } else if (bands->num_bands() == 3) { + ThreeBandsSynthesis(bands, data); + } +} + +void SplittingFilter::TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); + + for (size_t i = 0; i < two_bands_states_.size(); ++i) { + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(data->channels(0)[i], full_band16.size(), full_band16.data()); + WebRtcSpl_AnalysisQMF(full_band16.data(), data->num_frames(), + bands16[0].data(), bands16[1].data(), + two_bands_states_[i].analysis_state1, + two_bands_states_[i].analysis_state2); + S16ToFloatS16(bands16[0].data(), bands16[0].size(), bands->channels(0)[i]); + S16ToFloatS16(bands16[1].data(), bands16[1].size(), bands->channels(1)[i]); + } +} + +void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), two_bands_states_.size()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); + for (size_t i = 0; i < data->num_channels(); ++i) { + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(bands->channels(0)[i], bands16[0].size(), bands16[0].data()); + FloatS16ToS16(bands->channels(1)[i], bands16[1].size(), bands16[1].data()); + WebRtcSpl_SynthesisQMF(bands16[0].data(), bands16[1].data(), + bands->num_frames_per_band(), full_band16.data(), + two_bands_states_[i].synthesis_state1, + two_bands_states_[i].synthesis_state2); + S16ToFloatS16(full_band16.data(), full_band16.size(), data->channels(0)[i]); + } +} + +void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { + RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i].Analysis( + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize), + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands)); + } +} + +void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); + RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); + RTC_DCHECK_EQ(bands->num_frames_per_band(), + ThreeBandFilterBank::kSplitBandSize); + + for (size_t i = 0; i < data->num_channels(); ++i) { + three_band_filter_banks_[i].Synthesis( + rtc::ArrayView, + ThreeBandFilterBank::kNumBands>( + bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands), + rtc::ArrayView( + data->channels_view()[i].data(), + ThreeBandFilterBank::kFullBandSize)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter.h b/third_party/libwebrtc/modules/audio_processing/splitting_filter.h new file mode 100644 index 0000000000..e578dd07c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ + +#include +#include +#include + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/three_band_filter_bank.h" + +namespace webrtc { + +struct TwoBandsStates { + TwoBandsStates() { + memset(analysis_state1, 0, sizeof(analysis_state1)); + memset(analysis_state2, 0, sizeof(analysis_state2)); + memset(synthesis_state1, 0, sizeof(synthesis_state1)); + memset(synthesis_state2, 0, sizeof(synthesis_state2)); + } + + static const int kStateSize = 6; + int analysis_state1[kStateSize]; + int analysis_state2[kStateSize]; + int synthesis_state1[kStateSize]; + int synthesis_state2[kStateSize]; +}; + +// Splitting filter which is able to split into and merge from 2 or 3 frequency +// bands. The number of channels needs to be provided at construction time. +// +// For each block, Analysis() is called to split into bands and then Synthesis() +// to merge these bands again. The input and output signals are contained in +// ChannelBuffers and for the different bands an array of ChannelBuffers is +// used. +class SplittingFilter { + public: + SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); + ~SplittingFilter(); + + void Analysis(const ChannelBuffer* data, ChannelBuffer* bands); + void Synthesis(const ChannelBuffer* bands, ChannelBuffer* data); + + private: + // Two-band analysis and synthesis work for 640 samples or less. + void TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); + void ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); + void InitBuffers(); + + const size_t num_bands_; + std::vector two_bands_states_; + std::vector three_band_filter_banks_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc new file mode 100644 index 0000000000..30fe4caf9c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/splitting_filter_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/splitting_filter.h" + +#include + +#include "common_audio/channel_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const size_t kSamplesPer16kHzChannel = 160; +const size_t kSamplesPer48kHzChannel = 480; + +} // namespace + +// Generates a signal from presence or absence of sine waves of different +// frequencies. +// Splits into 3 bands and checks their presence or absence. +// Recombines the bands. +// Calculates the delay. +// Checks that the cross correlation of input and output is high enough at the +// calculated delay. +TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { + static const int kChannels = 1; + static const int kSampleRateHz = 48000; + static const size_t kNumBands = 3; + static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000}; + static const float kAmplitude = 8192.f; + static const size_t kChunks = 8; + SplittingFilter splitting_filter(kChannels, kNumBands, + kSamplesPer48kHzChannel); + ChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + for (size_t i = 0; i < kChunks; ++i) { + // Input signal generation. + bool is_present[kNumBands]; + memset(in_data.channels()[0], 0, + kSamplesPer48kHzChannel * sizeof(in_data.channels()[0][0])); + for (size_t j = 0; j < kNumBands; ++j) { + is_present[j] = i & (static_cast(1) << j); + float amplitude = is_present[j] ? kAmplitude : 0.f; + for (size_t k = 0; k < kSamplesPer48kHzChannel; ++k) { + in_data.channels()[0][k] += + amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * + (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); + } + } + // Three band splitting filter. + splitting_filter.Analysis(&in_data, &bands); + // Energy calculation. + float energy[kNumBands]; + for (size_t j = 0; j < kNumBands; ++j) { + energy[j] = 0.f; + for (size_t k = 0; k < kSamplesPer16kHzChannel; ++k) { + energy[j] += bands.channels(j)[0][k] * bands.channels(j)[0][k]; + } + energy[j] /= kSamplesPer16kHzChannel; + if (is_present[j]) { + EXPECT_GT(energy[j], kAmplitude * kAmplitude / 4); + } else { + EXPECT_LT(energy[j], kAmplitude * kAmplitude / 4); + } + } + // Three band merge. + splitting_filter.Synthesis(&bands, &out_data); + // Delay and cross correlation estimation. + float xcorr = 0.f; + for (size_t delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { + float tmpcorr = 0.f; + for (size_t j = delay; j < kSamplesPer48kHzChannel; ++j) { + tmpcorr += in_data.channels()[0][j - delay] * out_data.channels()[0][j]; + } + tmpcorr /= kSamplesPer48kHzChannel; + if (tmpcorr > xcorr) { + xcorr = tmpcorr; + } + } + // High cross correlation check. + bool any_present = false; + for (size_t j = 0; j < kNumBands; ++j) { + any_present |= is_present[j]; + } + if (any_present) { + EXPECT_GT(xcorr, kAmplitude * kAmplitude / 4); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc new file mode 100644 index 0000000000..416e287751 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/aec_dump_based_simulator.h" + +#include +#include + +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/aec_dump_based_simulator.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace test { +namespace { + +// Verify output bitexactness for the fixed interface. +// TODO(peah): Check whether it would make sense to add a threshold +// to use for checking the bitexactness in a soft manner. +bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg, + const Int16Frame& frame) { + if (sizeof(frame.data[0]) * frame.data.size() != msg.output_data().size()) { + return false; + } else { + const int16_t* frame_data = frame.data.data(); + for (int k = 0; k < frame.num_channels * frame.samples_per_channel; ++k) { + if (msg.output_data().data()[k] != frame_data[k]) { + return false; + } + } + } + return true; +} + +// Verify output bitexactness for the float interface. +bool VerifyFloatBitExactness(const webrtc::audioproc::Stream& msg, + const StreamConfig& out_config, + const ChannelBuffer& out_buf) { + if (static_cast(msg.output_channel_size()) != + out_config.num_channels() || + msg.output_channel(0).size() != out_config.num_frames()) { + return false; + } else { + for (int ch = 0; ch < msg.output_channel_size(); ++ch) { + for (size_t sample = 0; sample < out_config.num_frames(); ++sample) { + if (msg.output_channel(ch).data()[sample] != + out_buf.channels()[ch][sample]) { + return false; + } + } + } + } + return true; +} + +// Selectively reads the next proto-buf message from dump-file or string input. +// Returns a bool indicating whether a new message was available. +bool ReadNextMessage(bool use_dump_file, + FILE* dump_input_file, + std::stringstream& input, + webrtc::audioproc::Event& event_msg) { + if (use_dump_file) { + return ReadMessageFromFile(dump_input_file, &event_msg); + } + return ReadMessageFromString(&input, &event_msg); +} + +} // namespace + +AecDumpBasedSimulator::AecDumpBasedSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : AudioProcessingSimulator(settings, + std::move(audio_processing), + std::move(ap_builder)) { + MaybeOpenCallOrderFile(); +} + +AecDumpBasedSimulator::~AecDumpBasedSimulator() = default; + +void AecDumpBasedSimulator::PrepareProcessStreamCall( + const webrtc::audioproc::Stream& msg) { + if (msg.has_input_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(fwd_frame_.data[0]) * fwd_frame_.data.size(), + msg.input_data().size()); + memcpy(fwd_frame_.data.data(), msg.input_data().data(), + msg.input_data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(in_buf_->num_channels(), + static_cast(msg.input_channel_size())); + + // Populate input buffer. + for (size_t i = 0; i < in_buf_->num_channels(); ++i) { + RTC_CHECK_EQ(in_buf_->num_frames() * sizeof(*in_buf_->channels()[i]), + msg.input_channel(i).size()); + std::memcpy(in_buf_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } + + if (artificial_nearend_buffer_reader_) { + if (artificial_nearend_buffer_reader_->Read( + artificial_nearend_buf_.get())) { + if (msg.has_input_data()) { + int16_t* fwd_frame_data = fwd_frame_.data.data(); + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + fwd_frame_data[k] = rtc::saturated_cast( + fwd_frame_data[k] + + static_cast(32767 * + artificial_nearend_buf_->channels()[0][k])); + } + } else { + for (int i = 0; i < msg.input_channel_size(); ++i) { + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + in_buf_->channels()[i][k] += + artificial_nearend_buf_->channels()[0][k]; + in_buf_->channels()[i][k] = std::min( + 32767.f, std::max(-32768.f, in_buf_->channels()[i][k])); + } + } + } + } else { + if (!artificial_nearend_eof_reported_) { + std::cout << "The artificial nearend file ended before the recording."; + artificial_nearend_eof_reported_ = true; + } + } + } + + if (!settings_.use_stream_delay || *settings_.use_stream_delay) { + if (!settings_.stream_delay) { + if (msg.has_delay()) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(msg.delay())); + } + } else { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(*settings_.stream_delay)); + } + } + + if (settings_.override_key_pressed.has_value()) { + // Key pressed state overridden. + ap_->set_stream_key_pressed(*settings_.override_key_pressed); + } else { + // Set the recorded key pressed state. + if (msg.has_keypress()) { + ap_->set_stream_key_pressed(msg.keypress()); + } + } + + // Set the applied input level if available. + aec_dump_applied_input_level_ = + msg.has_applied_input_volume() + ? absl::optional(msg.applied_input_volume()) + : absl::nullopt; +} + +void AecDumpBasedSimulator::VerifyProcessStreamBitExactness( + const webrtc::audioproc::Stream& msg) { + if (bitexact_output_) { + if (interface_used_ == InterfaceType::kFixedInterface) { + bitexact_output_ = VerifyFixedBitExactness(msg, fwd_frame_); + } else { + bitexact_output_ = VerifyFloatBitExactness(msg, out_config_, *out_buf_); + } + } +} + +void AecDumpBasedSimulator::PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg) { + if (msg.has_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(rev_frame_.data[0]) * rev_frame_.data.size(), + msg.data().size()); + memcpy(rev_frame_.data.data(), msg.data().data(), msg.data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(reverse_in_buf_->num_channels(), + static_cast(msg.channel_size())); + + // Populate input buffer. + for (int i = 0; i < msg.channel_size(); ++i) { + RTC_CHECK_EQ(reverse_in_buf_->num_frames() * + sizeof(*reverse_in_buf_->channels()[i]), + msg.channel(i).size()); + std::memcpy(reverse_in_buf_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + } +} + +void AecDumpBasedSimulator::Process() { + ConfigureAudioProcessor(); + + if (settings_.artificial_nearend_filename) { + std::unique_ptr artificial_nearend_file( + new WavReader(settings_.artificial_nearend_filename->c_str())); + + RTC_CHECK_EQ(1, artificial_nearend_file->num_channels()) + << "Only mono files for the artificial nearend are supported, " + "reverted to not using the artificial nearend file"; + + const int sample_rate_hz = artificial_nearend_file->sample_rate(); + artificial_nearend_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(artificial_nearend_file))); + artificial_nearend_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond), 1)); + } + + const bool use_dump_file = !settings_.aec_dump_input_string.has_value(); + std::stringstream input; + if (use_dump_file) { + dump_input_file_ = + OpenFile(settings_.aec_dump_input_filename->c_str(), "rb"); + } else { + input << settings_.aec_dump_input_string.value(); + } + + webrtc::audioproc::Event event_msg; + int capture_frames_since_init = 0; + int init_index = 0; + while (ReadNextMessage(use_dump_file, dump_input_file_, input, event_msg)) { + SelectivelyToggleDataDumping(init_index, capture_frames_since_init); + HandleEvent(event_msg, capture_frames_since_init, init_index); + + // Perfom an early exit if the init block to process has been fully + // processed + if (finished_processing_specified_init_block_) { + break; + } + RTC_CHECK(!settings_.init_to_process || + *settings_.init_to_process >= init_index); + } + + if (use_dump_file) { + fclose(dump_input_file_); + } + + DetachAecDump(); +} + +void AecDumpBasedSimulator::Analyze() { + const bool use_dump_file = !settings_.aec_dump_input_string.has_value(); + std::stringstream input; + if (use_dump_file) { + dump_input_file_ = + OpenFile(settings_.aec_dump_input_filename->c_str(), "rb"); + } else { + input << settings_.aec_dump_input_string.value(); + } + + webrtc::audioproc::Event event_msg; + int num_capture_frames = 0; + int num_render_frames = 0; + int init_index = 0; + while (ReadNextMessage(use_dump_file, dump_input_file_, input, event_msg)) { + if (event_msg.type() == webrtc::audioproc::Event::INIT) { + ++init_index; + constexpr float kNumFramesPerSecond = 100.f; + float capture_time_seconds = num_capture_frames / kNumFramesPerSecond; + float render_time_seconds = num_render_frames / kNumFramesPerSecond; + + std::cout << "Inits:" << std::endl; + std::cout << init_index << ": -->" << std::endl; + std::cout << " Time:" << std::endl; + std::cout << " Capture: " << capture_time_seconds << " s (" + << num_capture_frames << " frames) " << std::endl; + std::cout << " Render: " << render_time_seconds << " s (" + << num_render_frames << " frames) " << std::endl; + } else if (event_msg.type() == webrtc::audioproc::Event::STREAM) { + ++num_capture_frames; + } else if (event_msg.type() == webrtc::audioproc::Event::REVERSE_STREAM) { + ++num_render_frames; + } + } + + if (use_dump_file) { + fclose(dump_input_file_); + } +} + +void AecDumpBasedSimulator::HandleEvent( + const webrtc::audioproc::Event& event_msg, + int& capture_frames_since_init, + int& init_index) { + switch (event_msg.type()) { + case webrtc::audioproc::Event::INIT: + RTC_CHECK(event_msg.has_init()); + ++init_index; + capture_frames_since_init = 0; + HandleMessage(event_msg.init(), init_index); + break; + case webrtc::audioproc::Event::STREAM: + RTC_CHECK(event_msg.has_stream()); + ++capture_frames_since_init; + HandleMessage(event_msg.stream()); + break; + case webrtc::audioproc::Event::REVERSE_STREAM: + RTC_CHECK(event_msg.has_reverse_stream()); + HandleMessage(event_msg.reverse_stream()); + break; + case webrtc::audioproc::Event::CONFIG: + RTC_CHECK(event_msg.has_config()); + HandleMessage(event_msg.config()); + break; + case webrtc::audioproc::Event::RUNTIME_SETTING: + HandleMessage(event_msg.runtime_setting()); + break; + case webrtc::audioproc::Event::UNKNOWN_EVENT: + RTC_CHECK_NOTREACHED(); + } +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Config& msg) { + if (settings_.use_verbose_logging) { + std::cout << "Config at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + if (!settings_.discard_all_settings_in_aecdump) { + if (settings_.use_verbose_logging) { + std::cout << "Setting used in config:" << std::endl; + } + AudioProcessing::Config apm_config = ap_->GetConfig(); + + if (msg.has_aec_enabled() || settings_.use_aec) { + bool enable = settings_.use_aec ? *settings_.use_aec : msg.aec_enabled(); + apm_config.echo_canceller.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " aec_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aecm_enabled() || settings_.use_aecm) { + bool enable = + settings_.use_aecm ? *settings_.use_aecm : msg.aecm_enabled(); + apm_config.echo_canceller.enabled |= enable; + apm_config.echo_canceller.mobile_mode = enable; + if (settings_.use_verbose_logging) { + std::cout << " aecm_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aecm_comfort_noise_enabled() && + msg.aecm_comfort_noise_enabled()) { + RTC_LOG(LS_ERROR) << "Ignoring deprecated setting: AECM comfort noise"; + } + + if (msg.has_aecm_routing_mode() && + static_cast( + msg.aecm_routing_mode()) != EchoControlMobileImpl::kSpeakerphone) { + RTC_LOG(LS_ERROR) << "Ignoring deprecated setting: AECM routing mode: " + << msg.aecm_routing_mode(); + } + + if (msg.has_agc_enabled() || settings_.use_agc) { + bool enable = settings_.use_agc ? *settings_.use_agc : msg.agc_enabled(); + apm_config.gain_controller1.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " agc_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_agc_mode() || settings_.agc_mode) { + int mode = settings_.agc_mode ? *settings_.agc_mode : msg.agc_mode(); + apm_config.gain_controller1.mode = + static_cast( + mode); + if (settings_.use_verbose_logging) { + std::cout << " agc_mode: " << mode << std::endl; + } + } + + if (msg.has_agc_limiter_enabled() || settings_.use_agc_limiter) { + bool enable = settings_.use_agc_limiter ? *settings_.use_agc_limiter + : msg.agc_limiter_enabled(); + apm_config.gain_controller1.enable_limiter = enable; + if (settings_.use_verbose_logging) { + std::cout << " agc_limiter_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (settings_.use_agc2) { + bool enable = *settings_.use_agc2; + apm_config.gain_controller2.enabled = enable; + if (settings_.agc2_fixed_gain_db) { + apm_config.gain_controller2.fixed_digital.gain_db = + *settings_.agc2_fixed_gain_db; + } + if (settings_.use_verbose_logging) { + std::cout << " agc2_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_noise_robust_agc_enabled()) { + apm_config.gain_controller1.analog_gain_controller.enabled = + settings_.use_analog_agc ? *settings_.use_analog_agc + : msg.noise_robust_agc_enabled(); + if (settings_.use_verbose_logging) { + std::cout << " noise_robust_agc_enabled: " + << (msg.noise_robust_agc_enabled() ? "true" : "false") + << std::endl; + } + } + + if (msg.has_transient_suppression_enabled() || settings_.use_ts) { + bool enable = settings_.use_ts ? *settings_.use_ts + : msg.transient_suppression_enabled(); + apm_config.transient_suppression.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " transient_suppression_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_hpf_enabled() || settings_.use_hpf) { + bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled(); + apm_config.high_pass_filter.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " hpf_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_enabled() || settings_.use_ns) { + bool enable = settings_.use_ns ? *settings_.use_ns : msg.ns_enabled(); + apm_config.noise_suppression.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " ns_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_level() || settings_.ns_level) { + int level = settings_.ns_level ? *settings_.ns_level : msg.ns_level(); + apm_config.noise_suppression.level = + static_cast(level); + if (settings_.use_verbose_logging) { + std::cout << " ns_level: " << level << std::endl; + } + } + + if (msg.has_pre_amplifier_enabled() || settings_.use_pre_amplifier) { + const bool enable = settings_.use_pre_amplifier + ? *settings_.use_pre_amplifier + : msg.pre_amplifier_enabled(); + apm_config.pre_amplifier.enabled = enable; + } + + if (msg.has_pre_amplifier_fixed_gain_factor() || + settings_.pre_amplifier_gain_factor) { + const float gain = settings_.pre_amplifier_gain_factor + ? *settings_.pre_amplifier_gain_factor + : msg.pre_amplifier_fixed_gain_factor(); + apm_config.pre_amplifier.fixed_gain_factor = gain; + } + + if (settings_.use_verbose_logging && msg.has_experiments_description() && + !msg.experiments_description().empty()) { + std::cout << " experiments not included by default in the simulation: " + << msg.experiments_description() << std::endl; + } + + ap_->ApplyConfig(apm_config); + } +} + +void AecDumpBasedSimulator::HandleMessage(const webrtc::audioproc::Init& msg, + int init_index) { + RTC_CHECK(msg.has_sample_rate()); + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_num_reverse_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + + // Do not perform the init if the init block to process is fully processed + if (settings_.init_to_process && *settings_.init_to_process < init_index) { + finished_processing_specified_init_block_ = true; + } + + MaybeOpenCallOrderFile(); + + if (settings_.use_verbose_logging) { + std::cout << "Init at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + int num_output_channels; + if (settings_.output_num_channels) { + num_output_channels = *settings_.output_num_channels; + } else { + num_output_channels = msg.has_num_output_channels() + ? msg.num_output_channels() + : msg.num_input_channels(); + } + + int output_sample_rate; + if (settings_.output_sample_rate_hz) { + output_sample_rate = *settings_.output_sample_rate_hz; + } else { + output_sample_rate = msg.has_output_sample_rate() ? msg.output_sample_rate() + : msg.sample_rate(); + } + + int num_reverse_output_channels; + if (settings_.reverse_output_num_channels) { + num_reverse_output_channels = *settings_.reverse_output_num_channels; + } else { + num_reverse_output_channels = msg.has_num_reverse_output_channels() + ? msg.num_reverse_output_channels() + : msg.num_reverse_channels(); + } + + int reverse_output_sample_rate; + if (settings_.reverse_output_sample_rate_hz) { + reverse_output_sample_rate = *settings_.reverse_output_sample_rate_hz; + } else { + reverse_output_sample_rate = msg.has_reverse_output_sample_rate() + ? msg.reverse_output_sample_rate() + : msg.reverse_sample_rate(); + } + + SetupBuffersConfigsOutputs( + msg.sample_rate(), output_sample_rate, msg.reverse_sample_rate(), + reverse_output_sample_rate, msg.num_input_channels(), num_output_channels, + msg.num_reverse_channels(), num_reverse_output_channels); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Stream& msg) { + if (call_order_output_file_) { + *call_order_output_file_ << "c"; + } + PrepareProcessStreamCall(msg); + ProcessStream(interface_used_ == InterfaceType::kFixedInterface); + VerifyProcessStreamBitExactness(msg); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::ReverseStream& msg) { + if (call_order_output_file_) { + *call_order_output_file_ << "r"; + } + PrepareReverseProcessStreamCall(msg); + ProcessReverseStream(interface_used_ == InterfaceType::kFixedInterface); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::RuntimeSetting& msg) { + RTC_CHECK(ap_.get()); + if (msg.has_capture_pre_gain()) { + // Handle capture pre-gain runtime setting only if not overridden. + const bool pre_amplifier_overridden = + (!settings_.use_pre_amplifier || *settings_.use_pre_amplifier) && + !settings_.pre_amplifier_gain_factor; + const bool capture_level_adjustment_overridden = + (!settings_.use_capture_level_adjustment || + *settings_.use_capture_level_adjustment) && + !settings_.pre_gain_factor; + if (pre_amplifier_overridden || capture_level_adjustment_overridden) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + msg.capture_pre_gain())); + } + } else if (msg.has_capture_post_gain()) { + // Handle capture post-gain runtime setting only if not overridden. + if ((!settings_.use_capture_level_adjustment || + *settings_.use_capture_level_adjustment) && + !settings_.post_gain_factor) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + msg.capture_pre_gain())); + } + } else if (msg.has_capture_fixed_post_gain()) { + // Handle capture fixed-post-gain runtime setting only if not overridden. + if ((!settings_.use_agc2 || *settings_.use_agc2) && + !settings_.agc2_fixed_gain_db) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureFixedPostGain( + msg.capture_fixed_post_gain())); + } + } else if (msg.has_playout_volume_change()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange( + msg.playout_volume_change())); + } else if (msg.has_playout_audio_device_change()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutAudioDeviceChange( + {msg.playout_audio_device_change().id(), + msg.playout_audio_device_change().max_volume()})); + } else if (msg.has_capture_output_used()) { + ap_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + msg.capture_output_used())); + } +} + +void AecDumpBasedSimulator::MaybeOpenCallOrderFile() { + if (settings_.call_order_output_filename.has_value()) { + const std::string filename = settings_.store_intermediate_output + ? *settings_.call_order_output_filename + + "_" + + std::to_string(output_reset_counter_) + : *settings_.call_order_output_filename; + call_order_output_file_ = std::make_unique(filename); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h new file mode 100644 index 0000000000..e2c1f3e4ba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/aec_dump_based_simulator.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ + +#include +#include + +#include "modules/audio_processing/test/audio_processing_simulator.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from an aec dump. +class AecDumpBasedSimulator final : public AudioProcessingSimulator { + public: + AecDumpBasedSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + AecDumpBasedSimulator() = delete; + AecDumpBasedSimulator(const AecDumpBasedSimulator&) = delete; + AecDumpBasedSimulator& operator=(const AecDumpBasedSimulator&) = delete; + + ~AecDumpBasedSimulator() override; + + // Processes the messages in the aecdump file. + void Process() override; + + // Analyzes the data in the aecdump file and reports the resulting statistics. + void Analyze() override; + + private: + void HandleEvent(const webrtc::audioproc::Event& event_msg, + int& num_forward_chunks_processed, + int& init_index); + void HandleMessage(const webrtc::audioproc::Init& msg, int init_index); + void HandleMessage(const webrtc::audioproc::Stream& msg); + void HandleMessage(const webrtc::audioproc::ReverseStream& msg); + void HandleMessage(const webrtc::audioproc::Config& msg); + void HandleMessage(const webrtc::audioproc::RuntimeSetting& msg); + void PrepareProcessStreamCall(const webrtc::audioproc::Stream& msg); + void PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg); + void VerifyProcessStreamBitExactness(const webrtc::audioproc::Stream& msg); + void MaybeOpenCallOrderFile(); + enum InterfaceType { + kFixedInterface, + kFloatInterface, + kNotSpecified, + }; + + FILE* dump_input_file_; + std::unique_ptr> artificial_nearend_buf_; + std::unique_ptr artificial_nearend_buffer_reader_; + bool artificial_nearend_eof_reported_ = false; + InterfaceType interface_used_ = InterfaceType::kNotSpecified; + std::unique_ptr call_order_output_file_; + bool finished_processing_specified_init_block_ = false; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml new file mode 100644 index 0000000000..c6063b3d76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties new file mode 100644 index 0000000000..9a2c9f6c88 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/default.properties @@ -0,0 +1,11 @@ +# This file is automatically generated by Android Tools. +# Do not modify this file -- YOUR CHANGES WILL BE ERASED! +# +# This file must be checked in Version Control Systems. +# +# To customize properties used by the Ant build system use, +# "build.properties", and override values to adapt the script to your +# project structure. + +# Project target. +target=android-9 diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c new file mode 100644 index 0000000000..2e19635683 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/jni/main.c @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +//BEGIN_INCLUDE(all) +#include +#include + +#include +#include + +#include +#include +#include + +#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "native-activity", __VA_ARGS__)) +#define LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, "native-activity", __VA_ARGS__)) + +/** + * Our saved state data. + */ +struct saved_state { + float angle; + int32_t x; + int32_t y; +}; + +/** + * Shared state for our app. + */ +struct engine { + struct android_app* app; + + ASensorManager* sensorManager; + const ASensor* accelerometerSensor; + ASensorEventQueue* sensorEventQueue; + + int animating; + EGLDisplay display; + EGLSurface surface; + EGLContext context; + int32_t width; + int32_t height; + struct saved_state state; +}; + +/** + * Initialize an EGL context for the current display. + */ +static int engine_init_display(struct engine* engine) { + // initialize OpenGL ES and EGL + + /* + * Here specify the attributes of the desired configuration. + * Below, we select an EGLConfig with at least 8 bits per color + * component compatible with on-screen windows + */ + const EGLint attribs[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_BLUE_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_RED_SIZE, 8, + EGL_NONE + }; + EGLint w, h, dummy, format; + EGLint numConfigs; + EGLConfig config; + EGLSurface surface; + EGLContext context; + + EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + + eglInitialize(display, 0, 0); + + /* Here, the application chooses the configuration it desires. In this + * sample, we have a very simplified selection process, where we pick + * the first EGLConfig that matches our criteria */ + eglChooseConfig(display, attribs, &config, 1, &numConfigs); + + /* EGL_NATIVE_VISUAL_ID is an attribute of the EGLConfig that is + * guaranteed to be accepted by ANativeWindow_setBuffersGeometry(). + * As soon as we picked a EGLConfig, we can safely reconfigure the + * ANativeWindow buffers to match, using EGL_NATIVE_VISUAL_ID. */ + eglGetConfigAttrib(display, config, EGL_NATIVE_VISUAL_ID, &format); + + ANativeWindow_setBuffersGeometry(engine->app->window, 0, 0, format); + + surface = eglCreateWindowSurface(display, config, engine->app->window, NULL); + context = eglCreateContext(display, config, NULL, NULL); + + if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { + LOGW("Unable to eglMakeCurrent"); + return -1; + } + + eglQuerySurface(display, surface, EGL_WIDTH, &w); + eglQuerySurface(display, surface, EGL_HEIGHT, &h); + + engine->display = display; + engine->context = context; + engine->surface = surface; + engine->width = w; + engine->height = h; + engine->state.angle = 0; + + // Initialize GL state. + glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST); + glEnable(GL_CULL_FACE); + glShadeModel(GL_SMOOTH); + glDisable(GL_DEPTH_TEST); + + return 0; +} + +/** + * Just the current frame in the display. + */ +static void engine_draw_frame(struct engine* engine) { + if (engine->display == NULL) { + // No display. + return; + } + + // Just fill the screen with a color. + glClearColor(((float)engine->state.x)/engine->width, engine->state.angle, + ((float)engine->state.y)/engine->height, 1); + glClear(GL_COLOR_BUFFER_BIT); + + eglSwapBuffers(engine->display, engine->surface); +} + +/** + * Tear down the EGL context currently associated with the display. + */ +static void engine_term_display(struct engine* engine) { + if (engine->display != EGL_NO_DISPLAY) { + eglMakeCurrent(engine->display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (engine->context != EGL_NO_CONTEXT) { + eglDestroyContext(engine->display, engine->context); + } + if (engine->surface != EGL_NO_SURFACE) { + eglDestroySurface(engine->display, engine->surface); + } + eglTerminate(engine->display); + } + engine->animating = 0; + engine->display = EGL_NO_DISPLAY; + engine->context = EGL_NO_CONTEXT; + engine->surface = EGL_NO_SURFACE; +} + +/** + * Process the next input event. + */ +static int32_t engine_handle_input(struct android_app* app, AInputEvent* event) { + struct engine* engine = (struct engine*)app->userData; + if (AInputEvent_getType(event) == AINPUT_EVENT_TYPE_MOTION) { + engine->animating = 1; + engine->state.x = AMotionEvent_getX(event, 0); + engine->state.y = AMotionEvent_getY(event, 0); + return 1; + } + return 0; +} + +/** + * Process the next main command. + */ +static void engine_handle_cmd(struct android_app* app, int32_t cmd) { + struct engine* engine = (struct engine*)app->userData; + switch (cmd) { + case APP_CMD_SAVE_STATE: + // The system has asked us to save our current state. Do so. + engine->app->savedState = malloc(sizeof(struct saved_state)); + *((struct saved_state*)engine->app->savedState) = engine->state; + engine->app->savedStateSize = sizeof(struct saved_state); + break; + case APP_CMD_INIT_WINDOW: + // The window is being shown, get it ready. + if (engine->app->window != NULL) { + engine_init_display(engine); + engine_draw_frame(engine); + } + break; + case APP_CMD_TERM_WINDOW: + // The window is being hidden or closed, clean it up. + engine_term_display(engine); + break; + case APP_CMD_GAINED_FOCUS: + // When our app gains focus, we start monitoring the accelerometer. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_enableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + // We'd like to get 60 events per second (in us). + ASensorEventQueue_setEventRate(engine->sensorEventQueue, + engine->accelerometerSensor, (1000L/60)*1000); + } + break; + case APP_CMD_LOST_FOCUS: + // When our app loses focus, we stop monitoring the accelerometer. + // This is to avoid consuming battery while not being used. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_disableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + } + // Also stop animating. + engine->animating = 0; + engine_draw_frame(engine); + break; + } +} + +/** + * This is the main entry point of a native application that is using + * android_native_app_glue. It runs in its own thread, with its own + * event loop for receiving input events and doing other things. + */ +void android_main(struct android_app* state) { + struct engine engine; + + // Make sure glue isn't stripped. + app_dummy(); + + memset(&engine, 0, sizeof(engine)); + state->userData = &engine; + state->onAppCmd = engine_handle_cmd; + state->onInputEvent = engine_handle_input; + engine.app = state; + + // Prepare to monitor accelerometer + engine.sensorManager = ASensorManager_getInstance(); + engine.accelerometerSensor = ASensorManager_getDefaultSensor(engine.sensorManager, + ASENSOR_TYPE_ACCELEROMETER); + engine.sensorEventQueue = ASensorManager_createEventQueue(engine.sensorManager, + state->looper, LOOPER_ID_USER, NULL, NULL); + + if (state->savedState != NULL) { + // We are starting with a previous saved state; restore from it. + engine.state = *(struct saved_state*)state->savedState; + } + + // loop waiting for stuff to do. + + while (1) { + // Read all pending events. + int ident; + int events; + struct android_poll_source* source; + + // If not animating, we will block forever waiting for events. + // If animating, we loop until all events are read, then continue + // to draw the next frame of animation. + while ((ident=ALooper_pollAll(engine.animating ? 0 : -1, NULL, &events, + (void**)&source)) >= 0) { + + // Process this event. + if (source != NULL) { + source->process(state, source); + } + + // If a sensor has data, process it now. + if (ident == LOOPER_ID_USER) { + if (engine.accelerometerSensor != NULL) { + ASensorEvent event; + while (ASensorEventQueue_getEvents(engine.sensorEventQueue, + &event, 1) > 0) { + LOGI("accelerometer: x=%f y=%f z=%f", + event.acceleration.x, event.acceleration.y, + event.acceleration.z); + } + } + } + + // Check if we are exiting. + if (state->destroyRequested != 0) { + engine_term_display(&engine); + return; + } + } + + if (engine.animating) { + // Done with events; draw next animation frame. + engine.state.angle += .01f; + if (engine.state.angle > 1) { + engine.state.angle = 0; + } + + // Drawing is throttled to the screen update rate, so there + // is no need to do timing here. + engine_draw_frame(&engine); + } + } +} +//END_INCLUDE(all) diff --git a/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml new file mode 100644 index 0000000000..d0bd0f3051 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml @@ -0,0 +1,4 @@ + + + apmtest + diff --git a/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc new file mode 100644 index 0000000000..ee8a308596 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/api_call_statistics.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +void ApiCallStatistics::Add(int64_t duration_nanos, CallType call_type) { + calls_.push_back(CallData(duration_nanos, call_type)); +} + +void ApiCallStatistics::PrintReport() const { + int64_t min_render = std::numeric_limits::max(); + int64_t min_capture = std::numeric_limits::max(); + int64_t max_render = 0; + int64_t max_capture = 0; + int64_t sum_render = 0; + int64_t sum_capture = 0; + int64_t num_render = 0; + int64_t num_capture = 0; + int64_t avg_render = 0; + int64_t avg_capture = 0; + + for (auto v : calls_) { + if (v.call_type == CallType::kRender) { + ++num_render; + min_render = std::min(min_render, v.duration_nanos); + max_render = std::max(max_render, v.duration_nanos); + sum_render += v.duration_nanos; + } else { + ++num_capture; + min_capture = std::min(min_capture, v.duration_nanos); + max_capture = std::max(max_capture, v.duration_nanos); + sum_capture += v.duration_nanos; + } + } + min_render /= rtc::kNumNanosecsPerMicrosec; + max_render /= rtc::kNumNanosecsPerMicrosec; + sum_render /= rtc::kNumNanosecsPerMicrosec; + min_capture /= rtc::kNumNanosecsPerMicrosec; + max_capture /= rtc::kNumNanosecsPerMicrosec; + sum_capture /= rtc::kNumNanosecsPerMicrosec; + avg_render = num_render > 0 ? sum_render / num_render : 0; + avg_capture = num_capture > 0 ? sum_capture / num_capture : 0; + + std::cout << std::endl + << "Total time: " << (sum_capture + sum_render) * 1e-6 << " s" + << std::endl + << " Render API calls:" << std::endl + << " min: " << min_render << " us" << std::endl + << " max: " << max_render << " us" << std::endl + << " avg: " << avg_render << " us" << std::endl + << " Capture API calls:" << std::endl + << " min: " << min_capture << " us" << std::endl + << " max: " << max_capture << " us" << std::endl + << " avg: " << avg_capture << " us" << std::endl; +} + +void ApiCallStatistics::WriteReportToFile(absl::string_view filename) const { + std::unique_ptr out = + std::make_unique(std::string(filename)); + for (auto v : calls_) { + if (v.call_type == CallType::kRender) { + *out << "render, "; + } else { + *out << "capture, "; + } + *out << (v.duration_nanos / rtc::kNumNanosecsPerMicrosec) << std::endl; + } +} + +ApiCallStatistics::CallData::CallData(int64_t duration_nanos, + CallType call_type) + : duration_nanos(duration_nanos), call_type(call_type) {} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h new file mode 100644 index 0000000000..8fced104f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/api_call_statistics.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { + +// Collects statistics about the API call durations. +class ApiCallStatistics { + public: + enum class CallType { kRender, kCapture }; + + // Adds a new datapoint. + void Add(int64_t duration_nanos, CallType call_type); + + // Prints out a report of the statistics. + void PrintReport() const; + + // Writes the call information to a file. + void WriteReportToFile(absl::string_view filename) const; + + private: + struct CallData { + CallData(int64_t duration_nanos, CallType call_type); + int64_t duration_nanos; + CallType call_type; + }; + std::vector calls_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_API_CALL_STATISTICS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/apmtest.m b/third_party/libwebrtc/modules/audio_processing/test/apmtest.m new file mode 100644 index 0000000000..1c8183c3ec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/apmtest.m @@ -0,0 +1,365 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function apmtest(task, testname, filepath, casenumber, legacy) +%APMTEST is a tool to process APM file sets and easily display the output. +% APMTEST(TASK, TESTNAME, CASENUMBER) performs one of several TASKs: +% 'test' Processes the files to produce test output. +% 'list' Prints a list of cases in the test set, preceded by their +% CASENUMBERs. +% 'show' Uses spclab to show the test case specified by the +% CASENUMBER parameter. +% +% using a set of test files determined by TESTNAME: +% 'all' All tests. +% 'apm' The standard APM test set (default). +% 'apmm' The mobile APM test set. +% 'aec' The AEC test set. +% 'aecm' The AECM test set. +% 'agc' The AGC test set. +% 'ns' The NS test set. +% 'vad' The VAD test set. +% +% FILEPATH specifies the path to the test data files. +% +% CASENUMBER can be used to select a single test case. Omit CASENUMBER, +% or set to zero, to use all test cases. +% + +if nargin < 5 || isempty(legacy) + % Set to true to run old VQE recordings. + legacy = false; +end + +if nargin < 4 || isempty(casenumber) + casenumber = 0; +end + +if nargin < 3 || isempty(filepath) + filepath = 'data/'; +end + +if nargin < 2 || isempty(testname) + testname = 'all'; +end + +if nargin < 1 || isempty(task) + task = 'test'; +end + +if ~strcmp(task, 'test') && ~strcmp(task, 'list') && ~strcmp(task, 'show') + error(['TASK ' task ' is not recognized']); +end + +if casenumber == 0 && strcmp(task, 'show') + error(['CASENUMBER must be specified for TASK ' task]); +end + +inpath = [filepath 'input/']; +outpath = [filepath 'output/']; +refpath = [filepath 'reference/']; + +if strcmp(testname, 'all') + tests = {'apm','apmm','aec','aecm','agc','ns','vad'}; +else + tests = {testname}; +end + +if legacy + progname = './test'; +else + progname = './process_test'; +end + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +if legacy + farFile = 'vqeFar.pcm'; + nearFile = 'vqeNear.pcm'; + eventFile = 'vqeEvent.dat'; + delayFile = 'vqeBuf.dat'; + driftFile = 'vqeDrift.dat'; +else + farFile = 'apm_far.pcm'; + nearFile = 'apm_near.pcm'; + eventFile = 'apm_event.dat'; + delayFile = 'apm_delay.dat'; + driftFile = 'apm_drift.dat'; +end + +simulateMode = false; +nErr = 0; +nCases = 0; +for i=1:length(tests) + simulateMode = false; + + if strcmp(tests{i}, 'apm') + testdir = ['apm/']; + outfile = ['out']; + if legacy + opt = ['-ec 1 -agc 2 -nc 2 -vad 3']; + else + opt = ['--no_progress -hpf' ... + ' -aec --drift_compensation -agc --fixed_digital' ... + ' -ns --ns_moderate -vad']; + end + + elseif strcmp(tests{i}, 'apm-swb') + simulateMode = true; + testdir = ['apm-swb/']; + outfile = ['out']; + if legacy + opt = ['-fs 32000 -ec 1 -agc 2 -nc 2']; + else + opt = ['--no_progress -fs 32000 -hpf' ... + ' -aec --drift_compensation -agc --adaptive_digital' ... + ' -ns --ns_moderate -vad']; + end + elseif strcmp(tests{i}, 'apmm') + testdir = ['apmm/']; + outfile = ['out']; + opt = ['-aec --drift_compensation -agc --fixed_digital -hpf -ns ' ... + '--ns_moderate']; + + else + error(['TESTNAME ' tests{i} ' is not recognized']); + end + + inpathtest = [inpath testdir]; + outpathtest = [outpath testdir]; + refpathtest = [refpath testdir]; + + if ~exist(inpathtest,'dir') + error(['Input directory ' inpathtest ' does not exist']); + end + + if ~exist(refpathtest,'dir') + warning(['Reference directory ' refpathtest ' does not exist']); + end + + [status, errMsg] = mkdir(outpathtest); + if (status == 0) + error(errMsg); + end + + [nErr, nCases] = recurseDir(inpathtest, outpathtest, refpathtest, outfile, ... + progname, opt, simulateMode, nErr, nCases, task, casenumber, legacy); + + if strcmp(task, 'test') || strcmp(task, 'show') + system(['rm ' farFile]); + system(['rm ' nearFile]); + if simulateMode == false + system(['rm ' eventFile]); + system(['rm ' delayFile]); + system(['rm ' driftFile]); + end + end +end + +if ~strcmp(task, 'list') + if nErr == 0 + fprintf(1, '\nAll files are bit-exact to reference\n', nErr); + else + fprintf(1, '\n%d files are NOT bit-exact to reference\n', nErr); + end +end + + +function [nErrOut, nCases] = recurseDir(inpath, outpath, refpath, ... + outfile, progname, opt, simulateMode, nErr, nCases, task, casenumber, ... + legacy) + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +dirs = dir(inpath); +nDirs = 0; +nErrOut = nErr; +for i=3:length(dirs) % skip . and .. + nDirs = nDirs + dirs(i).isdir; +end + + +if nDirs == 0 + nCases = nCases + 1; + + if casenumber == nCases || casenumber == 0 + + if strcmp(task, 'list') + fprintf([num2str(nCases) '. ' outfile '\n']) + else + vadoutfile = ['vad_' outfile '.dat']; + outfile = [outfile '.pcm']; + + % Check for VAD test + vadTest = 0; + if ~isempty(findstr(opt, '-vad')) + vadTest = 1; + if legacy + opt = [opt ' ' outpath vadoutfile]; + else + opt = [opt ' --vad_out_file ' outpath vadoutfile]; + end + end + + if exist([inpath 'vqeFar.pcm']) + system(['ln -s -f ' inpath 'vqeFar.pcm ' farFile]); + elseif exist([inpath 'apm_far.pcm']) + system(['ln -s -f ' inpath 'apm_far.pcm ' farFile]); + end + + if exist([inpath 'vqeNear.pcm']) + system(['ln -s -f ' inpath 'vqeNear.pcm ' nearFile]); + elseif exist([inpath 'apm_near.pcm']) + system(['ln -s -f ' inpath 'apm_near.pcm ' nearFile]); + end + + if exist([inpath 'vqeEvent.dat']) + system(['ln -s -f ' inpath 'vqeEvent.dat ' eventFile]); + elseif exist([inpath 'apm_event.dat']) + system(['ln -s -f ' inpath 'apm_event.dat ' eventFile]); + end + + if exist([inpath 'vqeBuf.dat']) + system(['ln -s -f ' inpath 'vqeBuf.dat ' delayFile]); + elseif exist([inpath 'apm_delay.dat']) + system(['ln -s -f ' inpath 'apm_delay.dat ' delayFile]); + end + + if exist([inpath 'vqeSkew.dat']) + system(['ln -s -f ' inpath 'vqeSkew.dat ' driftFile]); + elseif exist([inpath 'vqeDrift.dat']) + system(['ln -s -f ' inpath 'vqeDrift.dat ' driftFile]); + elseif exist([inpath 'apm_drift.dat']) + system(['ln -s -f ' inpath 'apm_drift.dat ' driftFile]); + end + + if simulateMode == false + command = [progname ' -o ' outpath outfile ' ' opt]; + else + if legacy + inputCmd = [' -in ' nearFile]; + else + inputCmd = [' -i ' nearFile]; + end + + if exist([farFile]) + if legacy + inputCmd = [' -if ' farFile inputCmd]; + else + inputCmd = [' -ir ' farFile inputCmd]; + end + end + command = [progname inputCmd ' -o ' outpath outfile ' ' opt]; + end + % This prevents MATLAB from using its own C libraries. + shellcmd = ['bash -c "unset LD_LIBRARY_PATH;']; + fprintf([command '\n']); + [status, result] = system([shellcmd command '"']); + fprintf(result); + + fprintf(['Reference file: ' refpath outfile '\n']); + + if vadTest == 1 + equal_to_ref = are_files_equal([outpath vadoutfile], ... + [refpath vadoutfile], ... + 'int8'); + if ~equal_to_ref + nErr = nErr + 1; + end + end + + [equal_to_ref, diffvector] = are_files_equal([outpath outfile], ... + [refpath outfile], ... + 'int16'); + if ~equal_to_ref + nErr = nErr + 1; + end + + if strcmp(task, 'show') + % Assume the last init gives the sample rate of interest. + str_idx = strfind(result, 'Sample rate:'); + fs = str2num(result(str_idx(end) + 13:str_idx(end) + 17)); + fprintf('Using %d Hz\n', fs); + + if exist([farFile]) + spclab(fs, farFile, nearFile, [refpath outfile], ... + [outpath outfile], diffvector); + %spclab(fs, diffvector); + else + spclab(fs, nearFile, [refpath outfile], [outpath outfile], ... + diffvector); + %spclab(fs, diffvector); + end + end + end + end +else + + for i=3:length(dirs) + if dirs(i).isdir + [nErr, nCases] = recurseDir([inpath dirs(i).name '/'], outpath, ... + refpath,[outfile '_' dirs(i).name], progname, opt, ... + simulateMode, nErr, nCases, task, casenumber, legacy); + end + end +end +nErrOut = nErr; + +function [are_equal, diffvector] = ... + are_files_equal(newfile, reffile, precision, diffvector) + +are_equal = false; +diffvector = 0; +if ~exist(newfile,'file') + warning(['Output file ' newfile ' does not exist']); + return +end + +if ~exist(reffile,'file') + warning(['Reference file ' reffile ' does not exist']); + return +end + +fid = fopen(newfile,'rb'); +new = fread(fid,inf,precision); +fclose(fid); + +fid = fopen(reffile,'rb'); +ref = fread(fid,inf,precision); +fclose(fid); + +if length(new) ~= length(ref) + warning('Reference is not the same length as output'); + minlength = min(length(new), length(ref)); + new = new(1:minlength); + ref = ref(1:minlength); +end +diffvector = new - ref; + +if isequal(new, ref) + fprintf([newfile ' is bit-exact to reference\n']); + are_equal = true; +else + if isempty(new) + warning([newfile ' is empty']); + return + end + snr = snrseg(new,ref,80); + fprintf('\n'); + are_equal = false; +end diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc new file mode 100644 index 0000000000..64fb9c7ab1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_buffer_tools.h" + +#include + +namespace webrtc { +namespace test { + +void SetupFrame(const StreamConfig& stream_config, + std::vector* frame, + std::vector* frame_samples) { + frame_samples->resize(stream_config.num_channels() * + stream_config.num_frames()); + frame->resize(stream_config.num_channels()); + for (size_t ch = 0; ch < stream_config.num_channels(); ++ch) { + (*frame)[ch] = &(*frame_samples)[ch * stream_config.num_frames()]; + } +} + +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView source, + AudioBuffer* destination) { + std::vector input; + std::vector input_samples; + + SetupFrame(stream_config, &input, &input_samples); + + RTC_CHECK_EQ(input_samples.size(), source.size()); + memcpy(input_samples.data(), source.data(), + source.size() * sizeof(source[0])); + + destination->CopyFrom(&input[0], stream_config); +} + +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector* destination) { + std::vector output; + + SetupFrame(stream_config, &output, destination); + + source->CopyTo(stream_config, &output[0]); +} + +void FillBuffer(float value, AudioBuffer& audio_buffer) { + for (size_t ch = 0; ch < audio_buffer.num_channels(); ++ch) { + FillBufferChannel(value, ch, audio_buffer); + } +} + +void FillBufferChannel(float value, int channel, AudioBuffer& audio_buffer) { + RTC_CHECK_LT(channel, audio_buffer.num_channels()); + for (size_t i = 0; i < audio_buffer.num_frames(); ++i) { + audio_buffer.channels()[channel][i] = value; + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h new file mode 100644 index 0000000000..faac4bf9ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_buffer_tools.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +namespace test { + +// Copies a vector into an audiobuffer. +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView source, + AudioBuffer* destination); + +// Extracts a vector from an audiobuffer. +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector* destination); + +// Sets all values in `audio_buffer` to `value`. +void FillBuffer(float value, AudioBuffer& audio_buffer); + +// Sets all values channel `channel` for `audio_buffer` to `value`. +void FillBufferChannel(float value, int channel, AudioBuffer& audio_buffer); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc new file mode 100644 index 0000000000..6bd266dc58 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" + +#include +#include + +#include "modules/audio_processing/audio_processing_impl.h" + +namespace webrtc { + +AudioProcessingBuilderForTesting::AudioProcessingBuilderForTesting() = default; +AudioProcessingBuilderForTesting::~AudioProcessingBuilderForTesting() = default; + +#ifdef WEBRTC_EXCLUDE_AUDIO_PROCESSING_MODULE + +rtc::scoped_refptr AudioProcessingBuilderForTesting::Create() { + return rtc::make_ref_counted( + config_, std::move(capture_post_processing_), + std::move(render_pre_processing_), std::move(echo_control_factory_), + std::move(echo_detector_), std::move(capture_analyzer_)); +} + +#else + +rtc::scoped_refptr AudioProcessingBuilderForTesting::Create() { + AudioProcessingBuilder builder; + TransferOwnershipsToBuilder(&builder); + return builder.SetConfig(config_).Create(); +} + +#endif + +void AudioProcessingBuilderForTesting::TransferOwnershipsToBuilder( + AudioProcessingBuilder* builder) { + builder->SetCapturePostProcessing(std::move(capture_post_processing_)); + builder->SetRenderPreProcessing(std::move(render_pre_processing_)); + builder->SetEchoControlFactory(std::move(echo_control_factory_)); + builder->SetEchoDetector(std::move(echo_detector_)); + builder->SetCaptureAnalyzer(std::move(capture_analyzer_)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h new file mode 100644 index 0000000000..e73706c1b6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_builder_for_testing.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ + +#include +#include +#include +#include + +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Facilitates building of AudioProcessingImp for the tests. +class AudioProcessingBuilderForTesting { + public: + AudioProcessingBuilderForTesting(); + AudioProcessingBuilderForTesting(const AudioProcessingBuilderForTesting&) = + delete; + AudioProcessingBuilderForTesting& operator=( + const AudioProcessingBuilderForTesting&) = delete; + ~AudioProcessingBuilderForTesting(); + + // Sets the APM configuration. + AudioProcessingBuilderForTesting& SetConfig( + const AudioProcessing::Config& config) { + config_ = config; + return *this; + } + + // Sets the echo controller factory to inject when APM is created. + AudioProcessingBuilderForTesting& SetEchoControlFactory( + std::unique_ptr echo_control_factory) { + echo_control_factory_ = std::move(echo_control_factory); + return *this; + } + + // Sets the capture post-processing sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetCapturePostProcessing( + std::unique_ptr capture_post_processing) { + capture_post_processing_ = std::move(capture_post_processing); + return *this; + } + + // Sets the render pre-processing sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetRenderPreProcessing( + std::unique_ptr render_pre_processing) { + render_pre_processing_ = std::move(render_pre_processing); + return *this; + } + + // Sets the echo detector to inject when APM is created. + AudioProcessingBuilderForTesting& SetEchoDetector( + rtc::scoped_refptr echo_detector) { + echo_detector_ = std::move(echo_detector); + return *this; + } + + // Sets the capture analyzer sub-module to inject when APM is created. + AudioProcessingBuilderForTesting& SetCaptureAnalyzer( + std::unique_ptr capture_analyzer) { + capture_analyzer_ = std::move(capture_analyzer); + return *this; + } + + // Creates an APM instance with the specified config or the default one if + // unspecified. Injects the specified components transferring the ownership + // to the newly created APM instance - i.e., except for the config, the + // builder is reset to its initial state. + rtc::scoped_refptr Create(); + + private: + // Transfers the ownership to a non-testing builder. + void TransferOwnershipsToBuilder(AudioProcessingBuilder* builder); + + AudioProcessing::Config config_; + std::unique_ptr echo_control_factory_; + std::unique_ptr capture_post_processing_; + std::unique_ptr render_pre_processing_; + rtc::scoped_refptr echo_detector_; + std::unique_ptr capture_analyzer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_BUILDER_FOR_TESTING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc new file mode 100644 index 0000000000..7497d49fde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.cc @@ -0,0 +1,630 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_processing_simulator.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/echo_canceller3_config_json.h" +#include "api/audio/echo_canceller3_factory.h" +#include "api/audio/echo_detector_creator.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/json.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { +namespace test { +namespace { +// Helper for reading JSON from a file and parsing it to an AEC3 configuration. +EchoCanceller3Config ReadAec3ConfigFromJsonFile(absl::string_view filename) { + std::string json_string; + std::string s; + std::ifstream f(std::string(filename).c_str()); + if (f.fail()) { + std::cout << "Failed to open the file " << filename << std::endl; + RTC_CHECK_NOTREACHED(); + } + while (std::getline(f, s)) { + json_string += s; + } + + bool parsing_successful; + EchoCanceller3Config cfg; + Aec3ConfigFromJsonString(json_string, &cfg, &parsing_successful); + if (!parsing_successful) { + std::cout << "Parsing of json string failed: " << std::endl + << json_string << std::endl; + RTC_CHECK_NOTREACHED(); + } + RTC_CHECK(EchoCanceller3Config::Validate(&cfg)); + + return cfg; +} + +std::string GetIndexedOutputWavFilename(absl::string_view wav_name, + int counter) { + rtc::StringBuilder ss; + ss << wav_name.substr(0, wav_name.size() - 4) << "_" << counter + << wav_name.substr(wav_name.size() - 4); + return ss.Release(); +} + +void WriteEchoLikelihoodGraphFileHeader(std::ofstream* output_file) { + (*output_file) << "import numpy as np" << std::endl + << "import matplotlib.pyplot as plt" << std::endl + << "y = np.array(["; +} + +void WriteEchoLikelihoodGraphFileFooter(std::ofstream* output_file) { + (*output_file) << "])" << std::endl + << "if __name__ == '__main__':" << std::endl + << " x = np.arange(len(y))*.01" << std::endl + << " plt.plot(x, y)" << std::endl + << " plt.ylabel('Echo likelihood')" << std::endl + << " plt.xlabel('Time (s)')" << std::endl + << " plt.show()" << std::endl; +} + +// RAII class for execution time measurement. Updates the provided +// ApiCallStatistics based on the time between ScopedTimer creation and +// leaving the enclosing scope. +class ScopedTimer { + public: + ScopedTimer(ApiCallStatistics* api_call_statistics, + ApiCallStatistics::CallType call_type) + : start_time_(rtc::TimeNanos()), + call_type_(call_type), + api_call_statistics_(api_call_statistics) {} + + ~ScopedTimer() { + api_call_statistics_->Add(rtc::TimeNanos() - start_time_, call_type_); + } + + private: + const int64_t start_time_; + const ApiCallStatistics::CallType call_type_; + ApiCallStatistics* const api_call_statistics_; +}; + +} // namespace + +SimulationSettings::SimulationSettings() = default; +SimulationSettings::SimulationSettings(const SimulationSettings&) = default; +SimulationSettings::~SimulationSettings() = default; + +AudioProcessingSimulator::AudioProcessingSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : settings_(settings), + ap_(std::move(audio_processing)), + applied_input_volume_(settings.initial_mic_level), + fake_recording_device_( + settings.initial_mic_level, + settings_.simulate_mic_gain ? *settings.simulated_mic_kind : 0), + worker_queue_("file_writer_task_queue") { + RTC_CHECK(!settings_.dump_internal_data || WEBRTC_APM_DEBUG_DUMP == 1); + if (settings_.dump_start_frame || settings_.dump_end_frame) { + ApmDataDumper::SetActivated(!settings_.dump_start_frame); + } else { + ApmDataDumper::SetActivated(settings_.dump_internal_data); + } + + if (settings_.dump_set_to_use) { + ApmDataDumper::SetDumpSetToUse(*settings_.dump_set_to_use); + } + + if (settings_.dump_internal_data_output_dir.has_value()) { + ApmDataDumper::SetOutputDirectory( + settings_.dump_internal_data_output_dir.value()); + } + + if (settings_.ed_graph_output_filename && + !settings_.ed_graph_output_filename->empty()) { + residual_echo_likelihood_graph_writer_.open( + *settings_.ed_graph_output_filename); + RTC_CHECK(residual_echo_likelihood_graph_writer_.is_open()); + WriteEchoLikelihoodGraphFileHeader(&residual_echo_likelihood_graph_writer_); + } + + if (settings_.simulate_mic_gain) + RTC_LOG(LS_VERBOSE) << "Simulating analog mic gain"; + + // Create the audio processing object. + RTC_CHECK(!(ap_ && ap_builder)) + << "The AudioProcessing and the AudioProcessingBuilder cannot both be " + "specified at the same time."; + + if (ap_) { + RTC_CHECK(!settings_.aec_settings_filename); + RTC_CHECK(!settings_.print_aec_parameter_values); + } else { + // Use specied builder if such is provided, otherwise create a new builder. + std::unique_ptr builder = + !!ap_builder ? std::move(ap_builder) + : std::make_unique(); + + // Create and set an EchoCanceller3Factory if needed. + const bool use_aec = settings_.use_aec && *settings_.use_aec; + if (use_aec) { + EchoCanceller3Config cfg; + if (settings_.aec_settings_filename) { + if (settings_.use_verbose_logging) { + std::cout << "Reading AEC Parameters from JSON input." << std::endl; + } + cfg = ReadAec3ConfigFromJsonFile(*settings_.aec_settings_filename); + } + + if (settings_.linear_aec_output_filename) { + cfg.filter.export_linear_aec_output = true; + } + + if (settings_.print_aec_parameter_values) { + if (!settings_.use_quiet_output) { + std::cout << "AEC settings:" << std::endl; + } + std::cout << Aec3ConfigToJsonString(cfg) << std::endl; + } + + auto echo_control_factory = std::make_unique(cfg); + builder->SetEchoControlFactory(std::move(echo_control_factory)); + } + + if (settings_.use_ed && *settings.use_ed) { + builder->SetEchoDetector(CreateEchoDetector()); + } + + // Create an audio processing object. + ap_ = builder->Create(); + RTC_CHECK(ap_); + } +} + +AudioProcessingSimulator::~AudioProcessingSimulator() { + if (residual_echo_likelihood_graph_writer_.is_open()) { + WriteEchoLikelihoodGraphFileFooter(&residual_echo_likelihood_graph_writer_); + residual_echo_likelihood_graph_writer_.close(); + } +} + +void AudioProcessingSimulator::ProcessStream(bool fixed_interface) { + // Optionally simulate the input volume. + if (settings_.simulate_mic_gain) { + RTC_DCHECK(!settings_.use_analog_mic_gain_emulation); + // Set the input volume to simulate. + fake_recording_device_.SetMicLevel(applied_input_volume_); + + if (settings_.aec_dump_input_filename && + aec_dump_applied_input_level_.has_value()) { + // For AEC dumps, use the applied input level, if recorded, to "virtually + // restore" the capture signal level before the input volume was applied. + fake_recording_device_.SetUndoMicLevel(*aec_dump_applied_input_level_); + } + + // Apply the input volume. + if (fixed_interface) { + fake_recording_device_.SimulateAnalogGain(fwd_frame_.data); + } else { + fake_recording_device_.SimulateAnalogGain(in_buf_.get()); + } + } + + // Let APM know which input volume was applied. + // Keep track of whether `set_stream_analog_level()` is called. + bool applied_input_volume_set = false; + if (settings_.simulate_mic_gain) { + // When the input volume is simulated, use the volume applied for + // simulation. + ap_->set_stream_analog_level(fake_recording_device_.MicLevel()); + applied_input_volume_set = true; + } else if (!settings_.use_analog_mic_gain_emulation) { + // Ignore the recommended input volume stored in `applied_input_volume_` and + // instead notify APM with the recorded input volume (if available). + if (settings_.aec_dump_input_filename && + aec_dump_applied_input_level_.has_value()) { + // The actually applied input volume is available in the AEC dump. + ap_->set_stream_analog_level(*aec_dump_applied_input_level_); + applied_input_volume_set = true; + } else if (!settings_.aec_dump_input_filename) { + // Wav files do not include any information about the actually applied + // input volume. Hence, use the recommended input volume stored in + // `applied_input_volume_`. + ap_->set_stream_analog_level(applied_input_volume_); + applied_input_volume_set = true; + } + } + + // Post any scheduled runtime settings. + if (settings_.frame_for_sending_capture_output_used_false && + *settings_.frame_for_sending_capture_output_used_false == + static_cast(num_process_stream_calls_)) { + ap_->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(false)); + } + if (settings_.frame_for_sending_capture_output_used_true && + *settings_.frame_for_sending_capture_output_used_true == + static_cast(num_process_stream_calls_)) { + ap_->PostRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(true)); + } + + // Process the current audio frame. + if (fixed_interface) { + { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kCapture); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config, + fwd_frame_.config, fwd_frame_.data.data())); + } + fwd_frame_.CopyTo(out_buf_.get()); + } else { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kCapture); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessStream(in_buf_->channels(), in_config_, + out_config_, out_buf_->channels())); + } + + // Retrieve the recommended input volume only if `set_stream_analog_level()` + // has been called to stick to the APM API contract. + if (applied_input_volume_set) { + applied_input_volume_ = ap_->recommended_stream_analog_level(); + } + + if (buffer_memory_writer_) { + RTC_CHECK(!buffer_file_writer_); + buffer_memory_writer_->Write(*out_buf_); + } else if (buffer_file_writer_) { + RTC_CHECK(!buffer_memory_writer_); + buffer_file_writer_->Write(*out_buf_); + } + + if (linear_aec_output_file_writer_) { + bool output_available = ap_->GetLinearAecOutput(linear_aec_output_buf_); + RTC_CHECK(output_available); + RTC_CHECK_GT(linear_aec_output_buf_.size(), 0); + RTC_CHECK_EQ(linear_aec_output_buf_[0].size(), 160); + for (size_t k = 0; k < linear_aec_output_buf_[0].size(); ++k) { + for (size_t ch = 0; ch < linear_aec_output_buf_.size(); ++ch) { + RTC_CHECK_EQ(linear_aec_output_buf_[ch].size(), 160); + float sample = FloatToFloatS16(linear_aec_output_buf_[ch][k]); + linear_aec_output_file_writer_->WriteSamples(&sample, 1); + } + } + } + + if (residual_echo_likelihood_graph_writer_.is_open()) { + auto stats = ap_->GetStatistics(); + residual_echo_likelihood_graph_writer_ + << stats.residual_echo_likelihood.value_or(-1.f) << ", "; + } + + ++num_process_stream_calls_; +} + +void AudioProcessingSimulator::ProcessReverseStream(bool fixed_interface) { + if (fixed_interface) { + { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kRender); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->ProcessReverseStream(rev_frame_.data.data(), rev_frame_.config, + rev_frame_.config, rev_frame_.data.data())); + } + rev_frame_.CopyTo(reverse_out_buf_.get()); + } else { + const auto st = ScopedTimer(&api_call_statistics_, + ApiCallStatistics::CallType::kRender); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessReverseStream( + reverse_in_buf_->channels(), reverse_in_config_, + reverse_out_config_, reverse_out_buf_->channels())); + } + + if (reverse_buffer_file_writer_) { + reverse_buffer_file_writer_->Write(*reverse_out_buf_); + } + + ++num_reverse_process_stream_calls_; +} + +void AudioProcessingSimulator::SetupBuffersConfigsOutputs( + int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels) { + in_config_ = StreamConfig(input_sample_rate_hz, input_num_channels); + in_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(input_sample_rate_hz, kChunksPerSecond), + input_num_channels)); + + reverse_in_config_ = + StreamConfig(reverse_input_sample_rate_hz, reverse_input_num_channels); + reverse_in_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(reverse_input_sample_rate_hz, kChunksPerSecond), + reverse_input_num_channels)); + + out_config_ = StreamConfig(output_sample_rate_hz, output_num_channels); + out_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(output_sample_rate_hz, kChunksPerSecond), + output_num_channels)); + + reverse_out_config_ = + StreamConfig(reverse_output_sample_rate_hz, reverse_output_num_channels); + reverse_out_buf_.reset(new ChannelBuffer( + rtc::CheckedDivExact(reverse_output_sample_rate_hz, kChunksPerSecond), + reverse_output_num_channels)); + + fwd_frame_.SetFormat(input_sample_rate_hz, input_num_channels); + rev_frame_.SetFormat(reverse_input_sample_rate_hz, + reverse_input_num_channels); + + if (settings_.use_verbose_logging) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + std::cout << "Sample rates:" << std::endl; + std::cout << " Forward input: " << input_sample_rate_hz << std::endl; + std::cout << " Forward output: " << output_sample_rate_hz << std::endl; + std::cout << " Reverse input: " << reverse_input_sample_rate_hz + << std::endl; + std::cout << " Reverse output: " << reverse_output_sample_rate_hz + << std::endl; + std::cout << "Number of channels: " << std::endl; + std::cout << " Forward input: " << input_num_channels << std::endl; + std::cout << " Forward output: " << output_num_channels << std::endl; + std::cout << " Reverse input: " << reverse_input_num_channels << std::endl; + std::cout << " Reverse output: " << reverse_output_num_channels + << std::endl; + } + + SetupOutput(); +} + +void AudioProcessingSimulator::SelectivelyToggleDataDumping( + int init_index, + int capture_frames_since_init) const { + if (!(settings_.dump_start_frame || settings_.dump_end_frame)) { + return; + } + + if (settings_.init_to_process && *settings_.init_to_process != init_index) { + return; + } + + if (settings_.dump_start_frame && + *settings_.dump_start_frame == capture_frames_since_init) { + ApmDataDumper::SetActivated(true); + } + + if (settings_.dump_end_frame && + *settings_.dump_end_frame == capture_frames_since_init) { + ApmDataDumper::SetActivated(false); + } +} + +void AudioProcessingSimulator::SetupOutput() { + if (settings_.output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.output_filename, + output_reset_counter_); + } else { + filename = *settings_.output_filename; + } + + std::unique_ptr out_file( + new WavWriter(filename, out_config_.sample_rate_hz(), + static_cast(out_config_.num_channels()), + settings_.wav_output_format)); + buffer_file_writer_.reset(new ChannelBufferWavWriter(std::move(out_file))); + } else if (settings_.aec_dump_input_string.has_value()) { + buffer_memory_writer_ = std::make_unique( + settings_.processed_capture_samples); + } + + if (settings_.linear_aec_output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename( + *settings_.linear_aec_output_filename, output_reset_counter_); + } else { + filename = *settings_.linear_aec_output_filename; + } + + linear_aec_output_file_writer_.reset( + new WavWriter(filename, 16000, out_config_.num_channels(), + settings_.wav_output_format)); + + linear_aec_output_buf_.resize(out_config_.num_channels()); + } + + if (settings_.reverse_output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.reverse_output_filename, + output_reset_counter_); + } else { + filename = *settings_.reverse_output_filename; + } + + std::unique_ptr reverse_out_file( + new WavWriter(filename, reverse_out_config_.sample_rate_hz(), + static_cast(reverse_out_config_.num_channels()), + settings_.wav_output_format)); + reverse_buffer_file_writer_.reset( + new ChannelBufferWavWriter(std::move(reverse_out_file))); + } + + ++output_reset_counter_; +} + +void AudioProcessingSimulator::DetachAecDump() { + if (settings_.aec_dump_output_filename) { + ap_->DetachAecDump(); + } +} + +void AudioProcessingSimulator::ConfigureAudioProcessor() { + AudioProcessing::Config apm_config; + if (settings_.use_ts) { + apm_config.transient_suppression.enabled = *settings_.use_ts != 0; + } + if (settings_.multi_channel_render) { + apm_config.pipeline.multi_channel_render = *settings_.multi_channel_render; + } + + if (settings_.multi_channel_capture) { + apm_config.pipeline.multi_channel_capture = + *settings_.multi_channel_capture; + } + + if (settings_.use_agc2) { + apm_config.gain_controller2.enabled = *settings_.use_agc2; + if (settings_.agc2_fixed_gain_db) { + apm_config.gain_controller2.fixed_digital.gain_db = + *settings_.agc2_fixed_gain_db; + } + if (settings_.agc2_use_adaptive_gain) { + apm_config.gain_controller2.adaptive_digital.enabled = + *settings_.agc2_use_adaptive_gain; + } + } + if (settings_.use_pre_amplifier) { + apm_config.pre_amplifier.enabled = *settings_.use_pre_amplifier; + if (settings_.pre_amplifier_gain_factor) { + apm_config.pre_amplifier.fixed_gain_factor = + *settings_.pre_amplifier_gain_factor; + } + } + + if (settings_.use_analog_mic_gain_emulation) { + if (*settings_.use_analog_mic_gain_emulation) { + apm_config.capture_level_adjustment.enabled = true; + apm_config.capture_level_adjustment.analog_mic_gain_emulation.enabled = + true; + } else { + apm_config.capture_level_adjustment.analog_mic_gain_emulation.enabled = + false; + } + } + if (settings_.analog_mic_gain_emulation_initial_level) { + apm_config.capture_level_adjustment.analog_mic_gain_emulation + .initial_level = *settings_.analog_mic_gain_emulation_initial_level; + } + + if (settings_.use_capture_level_adjustment) { + apm_config.capture_level_adjustment.enabled = + *settings_.use_capture_level_adjustment; + } + if (settings_.pre_gain_factor) { + apm_config.capture_level_adjustment.pre_gain_factor = + *settings_.pre_gain_factor; + } + if (settings_.post_gain_factor) { + apm_config.capture_level_adjustment.post_gain_factor = + *settings_.post_gain_factor; + } + + const bool use_aec = settings_.use_aec && *settings_.use_aec; + const bool use_aecm = settings_.use_aecm && *settings_.use_aecm; + if (use_aec || use_aecm) { + apm_config.echo_canceller.enabled = true; + apm_config.echo_canceller.mobile_mode = use_aecm; + } + apm_config.echo_canceller.export_linear_aec_output = + !!settings_.linear_aec_output_filename; + + if (settings_.use_hpf) { + apm_config.high_pass_filter.enabled = *settings_.use_hpf; + } + + if (settings_.use_agc) { + apm_config.gain_controller1.enabled = *settings_.use_agc; + } + if (settings_.agc_mode) { + apm_config.gain_controller1.mode = + static_cast( + *settings_.agc_mode); + } + if (settings_.use_agc_limiter) { + apm_config.gain_controller1.enable_limiter = *settings_.use_agc_limiter; + } + if (settings_.agc_target_level) { + apm_config.gain_controller1.target_level_dbfs = *settings_.agc_target_level; + } + if (settings_.agc_compression_gain) { + apm_config.gain_controller1.compression_gain_db = + *settings_.agc_compression_gain; + } + if (settings_.use_analog_agc) { + apm_config.gain_controller1.analog_gain_controller.enabled = + *settings_.use_analog_agc; + } + if (settings_.analog_agc_use_digital_adaptive_controller) { + apm_config.gain_controller1.analog_gain_controller.enable_digital_adaptive = + *settings_.analog_agc_use_digital_adaptive_controller; + } + + if (settings_.maximum_internal_processing_rate) { + apm_config.pipeline.maximum_internal_processing_rate = + *settings_.maximum_internal_processing_rate; + } + + if (settings_.use_ns) { + apm_config.noise_suppression.enabled = *settings_.use_ns; + } + if (settings_.ns_level) { + const int level = *settings_.ns_level; + RTC_CHECK_GE(level, 0); + RTC_CHECK_LE(level, 3); + apm_config.noise_suppression.level = + static_cast(level); + } + if (settings_.ns_analysis_on_linear_aec_output) { + apm_config.noise_suppression.analyze_linear_aec_output_when_available = + *settings_.ns_analysis_on_linear_aec_output; + } + + ap_->ApplyConfig(apm_config); + + if (settings_.use_ts) { + // Default to key pressed if activating the transient suppressor with + // continuous key events. + ap_->set_stream_key_pressed(*settings_.use_ts == 2); + } + + if (settings_.aec_dump_output_filename) { + ap_->AttachAecDump(AecDumpFactory::Create( + *settings_.aec_dump_output_filename, -1, &worker_queue_)); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h new file mode 100644 index 0000000000..e40d818bd8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audio_processing_simulator.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ + +#include +#include +#include +#include +#include + +#include "absl/types/optional.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/api_call_statistics.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/task_queue_for_test.h" +#include "rtc_base/time_utils.h" + +namespace webrtc { +namespace test { + +static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs; + +struct Int16Frame { + void SetFormat(int sample_rate_hz, int num_channels) { + this->sample_rate_hz = sample_rate_hz; + samples_per_channel = + rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond); + this->num_channels = num_channels; + config = StreamConfig(sample_rate_hz, num_channels); + data.resize(num_channels * samples_per_channel); + } + + void CopyTo(ChannelBuffer* dest) { + RTC_DCHECK(dest); + RTC_CHECK_EQ(num_channels, dest->num_channels()); + RTC_CHECK_EQ(samples_per_channel, dest->num_frames()); + // Copy the data from the input buffer. + std::vector tmp(samples_per_channel * num_channels); + S16ToFloat(data.data(), tmp.size(), tmp.data()); + Deinterleave(tmp.data(), samples_per_channel, num_channels, + dest->channels()); + } + + void CopyFrom(const ChannelBuffer& src) { + RTC_CHECK_EQ(src.num_channels(), num_channels); + RTC_CHECK_EQ(src.num_frames(), samples_per_channel); + data.resize(num_channels * samples_per_channel); + int16_t* dest_data = data.data(); + for (int ch = 0; ch < num_channels; ++ch) { + for (int sample = 0; sample < samples_per_channel; ++sample) { + dest_data[sample * num_channels + ch] = + src.channels()[ch][sample] * 32767; + } + } + } + + int sample_rate_hz; + int samples_per_channel; + int num_channels; + + StreamConfig config; + + std::vector data; +}; + +// Holds all the parameters available for controlling the simulation. +struct SimulationSettings { + SimulationSettings(); + SimulationSettings(const SimulationSettings&); + ~SimulationSettings(); + absl::optional stream_delay; + absl::optional use_stream_delay; + absl::optional output_sample_rate_hz; + absl::optional output_num_channels; + absl::optional reverse_output_sample_rate_hz; + absl::optional reverse_output_num_channels; + absl::optional output_filename; + absl::optional reverse_output_filename; + absl::optional input_filename; + absl::optional reverse_input_filename; + absl::optional artificial_nearend_filename; + absl::optional linear_aec_output_filename; + absl::optional use_aec; + absl::optional use_aecm; + absl::optional use_ed; // Residual Echo Detector. + absl::optional ed_graph_output_filename; + absl::optional use_agc; + absl::optional use_agc2; + absl::optional use_pre_amplifier; + absl::optional use_capture_level_adjustment; + absl::optional use_analog_mic_gain_emulation; + absl::optional use_hpf; + absl::optional use_ns; + absl::optional use_ts; + absl::optional use_analog_agc; + absl::optional use_all; + absl::optional analog_agc_use_digital_adaptive_controller; + absl::optional agc_mode; + absl::optional agc_target_level; + absl::optional use_agc_limiter; + absl::optional agc_compression_gain; + absl::optional agc2_use_adaptive_gain; + absl::optional agc2_fixed_gain_db; + absl::optional pre_amplifier_gain_factor; + absl::optional pre_gain_factor; + absl::optional post_gain_factor; + absl::optional analog_mic_gain_emulation_initial_level; + absl::optional ns_level; + absl::optional ns_analysis_on_linear_aec_output; + absl::optional override_key_pressed; + absl::optional maximum_internal_processing_rate; + int initial_mic_level; + bool simulate_mic_gain = false; + absl::optional multi_channel_render; + absl::optional multi_channel_capture; + absl::optional simulated_mic_kind; + absl::optional frame_for_sending_capture_output_used_false; + absl::optional frame_for_sending_capture_output_used_true; + bool report_performance = false; + absl::optional performance_report_output_filename; + bool report_bitexactness = false; + bool use_verbose_logging = false; + bool use_quiet_output = false; + bool discard_all_settings_in_aecdump = true; + absl::optional aec_dump_input_filename; + absl::optional aec_dump_output_filename; + bool fixed_interface = false; + bool store_intermediate_output = false; + bool print_aec_parameter_values = false; + bool dump_internal_data = false; + WavFile::SampleFormat wav_output_format = WavFile::SampleFormat::kInt16; + absl::optional dump_internal_data_output_dir; + absl::optional dump_set_to_use; + absl::optional call_order_input_filename; + absl::optional call_order_output_filename; + absl::optional aec_settings_filename; + absl::optional aec_dump_input_string; + std::vector* processed_capture_samples = nullptr; + bool analysis_only = false; + absl::optional dump_start_frame; + absl::optional dump_end_frame; + absl::optional init_to_process; +}; + +// Provides common functionality for performing audioprocessing simulations. +class AudioProcessingSimulator { + public: + AudioProcessingSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + AudioProcessingSimulator() = delete; + AudioProcessingSimulator(const AudioProcessingSimulator&) = delete; + AudioProcessingSimulator& operator=(const AudioProcessingSimulator&) = delete; + + virtual ~AudioProcessingSimulator(); + + // Processes the data in the input. + virtual void Process() = 0; + + // Returns the execution times of all AudioProcessing calls. + const ApiCallStatistics& GetApiCallStatistics() const { + return api_call_statistics_; + } + + // Analyzes the data in the input and reports the resulting statistics. + virtual void Analyze() = 0; + + // Reports whether the processed recording was bitexact. + bool OutputWasBitexact() { return bitexact_output_; } + + size_t get_num_process_stream_calls() { return num_process_stream_calls_; } + size_t get_num_reverse_process_stream_calls() { + return num_reverse_process_stream_calls_; + } + + protected: + void ProcessStream(bool fixed_interface); + void ProcessReverseStream(bool fixed_interface); + void ConfigureAudioProcessor(); + void DetachAecDump(); + void SetupBuffersConfigsOutputs(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels); + void SelectivelyToggleDataDumping(int init_index, + int capture_frames_since_init) const; + + const SimulationSettings settings_; + rtc::scoped_refptr ap_; + + std::unique_ptr> in_buf_; + std::unique_ptr> out_buf_; + std::unique_ptr> reverse_in_buf_; + std::unique_ptr> reverse_out_buf_; + std::vector> linear_aec_output_buf_; + StreamConfig in_config_; + StreamConfig out_config_; + StreamConfig reverse_in_config_; + StreamConfig reverse_out_config_; + std::unique_ptr buffer_reader_; + std::unique_ptr reverse_buffer_reader_; + Int16Frame rev_frame_; + Int16Frame fwd_frame_; + bool bitexact_output_ = true; + absl::optional aec_dump_applied_input_level_ = 0; + + protected: + size_t output_reset_counter_ = 0; + + private: + void SetupOutput(); + + size_t num_process_stream_calls_ = 0; + size_t num_reverse_process_stream_calls_ = 0; + std::unique_ptr buffer_file_writer_; + std::unique_ptr reverse_buffer_file_writer_; + std::unique_ptr buffer_memory_writer_; + std::unique_ptr linear_aec_output_file_writer_; + ApiCallStatistics api_call_statistics_; + std::ofstream residual_echo_likelihood_graph_writer_; + int applied_input_volume_; + FakeRecordingDevice fake_recording_device_; + + TaskQueueForTest worker_queue_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc new file mode 100644 index 0000000000..c23ec74366 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.cc @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audioproc_float_impl.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/string_view.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/aec_dump_based_simulator.h" +#include "modules/audio_processing/test/audio_processing_simulator.h" +#include "modules/audio_processing/test/wav_based_simulator.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/field_trial.h" + +constexpr int kParameterNotSpecifiedValue = -10000; + +ABSL_FLAG(std::string, dump_input, "", "Aec dump input filename"); +ABSL_FLAG(std::string, dump_output, "", "Aec dump output filename"); +ABSL_FLAG(std::string, i, "", "Forward stream input wav filename"); +ABSL_FLAG(std::string, o, "", "Forward stream output wav filename"); +ABSL_FLAG(std::string, ri, "", "Reverse stream input wav filename"); +ABSL_FLAG(std::string, ro, "", "Reverse stream output wav filename"); +ABSL_FLAG(std::string, + artificial_nearend, + "", + "Artificial nearend wav filename"); +ABSL_FLAG(std::string, linear_aec_output, "", "Linear AEC output wav filename"); +ABSL_FLAG(int, + output_num_channels, + kParameterNotSpecifiedValue, + "Number of forward stream output channels"); +ABSL_FLAG(int, + reverse_output_num_channels, + kParameterNotSpecifiedValue, + "Number of Reverse stream output channels"); +ABSL_FLAG(int, + output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Forward stream output sample rate in Hz"); +ABSL_FLAG(int, + reverse_output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Reverse stream output sample rate in Hz"); +ABSL_FLAG(bool, + fixed_interface, + false, + "Use the fixed interface when operating on wav files"); +ABSL_FLAG(int, + aec, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the echo canceller"); +ABSL_FLAG(int, + aecm, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the mobile echo controller"); +ABSL_FLAG(int, + ed, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the residual echo detector"); +ABSL_FLAG(std::string, + ed_graph, + "", + "Output filename for graph of echo likelihood"); +ABSL_FLAG(int, + agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC"); +ABSL_FLAG(int, + agc2, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC2"); +ABSL_FLAG(int, + pre_amplifier, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the pre amplifier"); +ABSL_FLAG( + int, + capture_level_adjustment, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the capture level adjustment functionality"); +ABSL_FLAG(int, + analog_mic_gain_emulation, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the analog mic gain emulation in the " + "production (non-test) code."); +ABSL_FLAG(int, + hpf, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the high-pass filter"); +ABSL_FLAG(int, + ns, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the noise suppressor"); +ABSL_FLAG(int, + ts, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the transient suppressor"); +ABSL_FLAG(int, + analog_agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the analog AGC"); +ABSL_FLAG(bool, + all_default, + false, + "Activate all of the default components (will be overridden by any " + "other settings)"); +ABSL_FLAG(int, + analog_agc_use_digital_adaptive_controller, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) digital adaptation in AGC1. " + "Digital adaptation is active by default."); +ABSL_FLAG(int, + agc_mode, + kParameterNotSpecifiedValue, + "Specify the AGC mode (0-2)"); +ABSL_FLAG(int, + agc_target_level, + kParameterNotSpecifiedValue, + "Specify the AGC target level (0-31)"); +ABSL_FLAG(int, + agc_limiter, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the level estimator"); +ABSL_FLAG(int, + agc_compression_gain, + kParameterNotSpecifiedValue, + "Specify the AGC compression gain (0-90)"); +ABSL_FLAG(int, + agc2_enable_adaptive_gain, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the AGC2 adaptive gain"); +ABSL_FLAG(float, + agc2_fixed_gain_db, + kParameterNotSpecifiedValue, + "AGC2 fixed gain (dB) to apply"); +ABSL_FLAG(float, + pre_amplifier_gain_factor, + kParameterNotSpecifiedValue, + "Pre-amplifier gain factor (linear) to apply"); +ABSL_FLAG(float, + pre_gain_factor, + kParameterNotSpecifiedValue, + "Pre-gain factor (linear) to apply in the capture level adjustment"); +ABSL_FLAG(float, + post_gain_factor, + kParameterNotSpecifiedValue, + "Post-gain factor (linear) to apply in the capture level adjustment"); +ABSL_FLAG(float, + analog_mic_gain_emulation_initial_level, + kParameterNotSpecifiedValue, + "Emulated analog mic level to apply initially in the production " + "(non-test) code."); +ABSL_FLAG(int, + ns_level, + kParameterNotSpecifiedValue, + "Specify the NS level (0-3)"); +ABSL_FLAG(int, + ns_analysis_on_linear_aec_output, + kParameterNotSpecifiedValue, + "Specifies whether the noise suppression analysis is done on the " + "linear AEC output"); +ABSL_FLAG(int, + maximum_internal_processing_rate, + kParameterNotSpecifiedValue, + "Set a maximum internal processing rate (32000 or 48000) to override " + "the default rate"); +ABSL_FLAG(int, + stream_delay, + kParameterNotSpecifiedValue, + "Specify the stream delay in ms to use"); +ABSL_FLAG(int, + use_stream_delay, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) reporting the stream delay"); +ABSL_FLAG(int, + stream_drift_samples, + kParameterNotSpecifiedValue, + "Specify the number of stream drift samples to use"); +ABSL_FLAG(int, + initial_mic_level, + 100, + "Initial mic level (0-255) for the analog mic gain simulation in the " + "test code"); +ABSL_FLAG(int, + simulate_mic_gain, + 0, + "Activate (1) or deactivate(0) the analog mic gain simulation in the " + "test code"); +ABSL_FLAG(int, + multi_channel_render, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) multi-channel render processing in " + "APM pipeline"); +ABSL_FLAG(int, + multi_channel_capture, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) multi-channel capture processing in " + "APM pipeline"); +ABSL_FLAG(int, + simulated_mic_kind, + kParameterNotSpecifiedValue, + "Specify which microphone kind to use for microphone simulation"); +ABSL_FLAG(int, + override_key_pressed, + kParameterNotSpecifiedValue, + "Always set to true (1) or to false (0) the key press state. If " + "unspecified, false is set with Wav files or, with AEC dumps, the " + "recorded event is used."); +ABSL_FLAG(int, + frame_for_sending_capture_output_used_false, + kParameterNotSpecifiedValue, + "Capture frame index for sending a runtime setting for that the " + "capture output is not used."); +ABSL_FLAG(int, + frame_for_sending_capture_output_used_true, + kParameterNotSpecifiedValue, + "Capture frame index for sending a runtime setting for that the " + "capture output is used."); +ABSL_FLAG(bool, performance_report, false, "Report the APM performance "); +ABSL_FLAG(std::string, + performance_report_output_file, + "", + "Generate a CSV file with the API call durations"); +ABSL_FLAG(bool, verbose, false, "Produce verbose output"); +ABSL_FLAG(bool, + quiet, + false, + "Avoid producing information about the progress."); +ABSL_FLAG(bool, + bitexactness_report, + false, + "Report bitexactness for aec dump result reproduction"); +ABSL_FLAG(bool, + discard_settings_in_aecdump, + false, + "Discard any config settings specified in the aec dump"); +ABSL_FLAG(bool, + store_intermediate_output, + false, + "Creates new output files after each init"); +ABSL_FLAG(std::string, + custom_call_order_file, + "", + "Custom process API call order file"); +ABSL_FLAG(std::string, + output_custom_call_order_file, + "", + "Generate custom process API call order file from AEC dump"); +ABSL_FLAG(bool, + print_aec_parameter_values, + false, + "Print parameter values used in AEC in JSON-format"); +ABSL_FLAG(std::string, + aec_settings, + "", + "File in JSON-format with custom AEC settings"); +ABSL_FLAG(bool, + dump_data, + false, + "Dump internal data during the call (requires build flag)"); +ABSL_FLAG(std::string, + dump_data_output_dir, + "", + "Internal data dump output directory"); +ABSL_FLAG(int, + dump_set_to_use, + kParameterNotSpecifiedValue, + "Specifies the dump set to use (if not all the dump sets will " + "be used"); +ABSL_FLAG(bool, + analyze, + false, + "Only analyze the call setup behavior (no processing)"); +ABSL_FLAG(float, + dump_start_seconds, + kParameterNotSpecifiedValue, + "Start of when to dump data (seconds)."); +ABSL_FLAG(float, + dump_end_seconds, + kParameterNotSpecifiedValue, + "End of when to dump data (seconds)."); +ABSL_FLAG(int, + dump_start_frame, + kParameterNotSpecifiedValue, + "Start of when to dump data (frames)."); +ABSL_FLAG(int, + dump_end_frame, + kParameterNotSpecifiedValue, + "End of when to dump data (frames)."); +ABSL_FLAG(int, + init_to_process, + kParameterNotSpecifiedValue, + "Init index to process."); + +ABSL_FLAG(bool, + float_wav_output, + false, + "Produce floating point wav output files."); + +ABSL_FLAG(std::string, + force_fieldtrials, + "", + "Field trials control experimental feature code which can be forced. " + "E.g. running with --force_fieldtrials=WebRTC-FooFeature/Enable/" + " will assign the group Enable to field trial WebRTC-FooFeature."); + +namespace webrtc { +namespace test { +namespace { + +const char kUsageDescription[] = + "Usage: audioproc_f [options] -i \n" + " or\n" + " audioproc_f [options] -dump_input \n" + "\n\n" + "Command-line tool to simulate a call using the audio " + "processing module, either based on wav files or " + "protobuf debug dump recordings.\n"; + +void SetSettingIfSpecified(absl::string_view value, + absl::optional* parameter) { + if (value.compare("") != 0) { + *parameter = std::string(value); + } +} + +void SetSettingIfSpecified(int value, absl::optional* parameter) { + if (value != kParameterNotSpecifiedValue) { + *parameter = value; + } +} + +void SetSettingIfSpecified(float value, absl::optional* parameter) { + constexpr float kFloatParameterNotSpecifiedValue = + kParameterNotSpecifiedValue; + if (value != kFloatParameterNotSpecifiedValue) { + *parameter = value; + } +} + +void SetSettingIfFlagSet(int32_t flag, absl::optional* parameter) { + if (flag == 0) { + *parameter = false; + } else if (flag == 1) { + *parameter = true; + } +} + +SimulationSettings CreateSettings() { + SimulationSettings settings; + if (absl::GetFlag(FLAGS_all_default)) { + settings.use_ts = true; + settings.use_analog_agc = true; + settings.use_ns = true; + settings.use_hpf = true; + settings.use_agc = true; + settings.use_agc2 = false; + settings.use_pre_amplifier = false; + settings.use_aec = true; + settings.use_aecm = false; + settings.use_ed = false; + } + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_input), + &settings.aec_dump_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_output), + &settings.aec_dump_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_i), &settings.input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_o), &settings.output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ri), + &settings.reverse_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ro), + &settings.reverse_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_artificial_nearend), + &settings.artificial_nearend_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_linear_aec_output), + &settings.linear_aec_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_num_channels), + &settings.output_num_channels); + SetSettingIfSpecified(absl::GetFlag(FLAGS_reverse_output_num_channels), + &settings.reverse_output_num_channels); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_sample_rate_hz), + &settings.output_sample_rate_hz); + SetSettingIfSpecified(absl::GetFlag(FLAGS_reverse_output_sample_rate_hz), + &settings.reverse_output_sample_rate_hz); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_aec), &settings.use_aec); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_aecm), &settings.use_aecm); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ed), &settings.use_ed); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ed_graph), + &settings.ed_graph_output_filename); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc), &settings.use_agc); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc2), &settings.use_agc2); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_pre_amplifier), + &settings.use_pre_amplifier); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_capture_level_adjustment), + &settings.use_capture_level_adjustment); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_mic_gain_emulation), + &settings.use_analog_mic_gain_emulation); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_hpf), &settings.use_hpf); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ns), &settings.use_ns); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ts), &settings.use_ts); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc), + &settings.use_analog_agc); + SetSettingIfFlagSet( + absl::GetFlag(FLAGS_analog_agc_use_digital_adaptive_controller), + &settings.analog_agc_use_digital_adaptive_controller); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_mode), &settings.agc_mode); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_target_level), + &settings.agc_target_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc_limiter), + &settings.use_agc_limiter); + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_compression_gain), + &settings.agc_compression_gain); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_agc2_enable_adaptive_gain), + &settings.agc2_use_adaptive_gain); + + SetSettingIfSpecified(absl::GetFlag(FLAGS_agc2_fixed_gain_db), + &settings.agc2_fixed_gain_db); + SetSettingIfSpecified(absl::GetFlag(FLAGS_pre_amplifier_gain_factor), + &settings.pre_amplifier_gain_factor); + SetSettingIfSpecified(absl::GetFlag(FLAGS_pre_gain_factor), + &settings.pre_gain_factor); + SetSettingIfSpecified(absl::GetFlag(FLAGS_post_gain_factor), + &settings.post_gain_factor); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_analog_mic_gain_emulation_initial_level), + &settings.analog_mic_gain_emulation_initial_level); + SetSettingIfSpecified(absl::GetFlag(FLAGS_ns_level), &settings.ns_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_ns_analysis_on_linear_aec_output), + &settings.ns_analysis_on_linear_aec_output); + SetSettingIfSpecified(absl::GetFlag(FLAGS_maximum_internal_processing_rate), + &settings.maximum_internal_processing_rate); + SetSettingIfSpecified(absl::GetFlag(FLAGS_stream_delay), + &settings.stream_delay); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_use_stream_delay), + &settings.use_stream_delay); + SetSettingIfSpecified(absl::GetFlag(FLAGS_custom_call_order_file), + &settings.call_order_input_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_output_custom_call_order_file), + &settings.call_order_output_filename); + SetSettingIfSpecified(absl::GetFlag(FLAGS_aec_settings), + &settings.aec_settings_filename); + settings.initial_mic_level = absl::GetFlag(FLAGS_initial_mic_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_multi_channel_render), + &settings.multi_channel_render); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_multi_channel_capture), + &settings.multi_channel_capture); + settings.simulate_mic_gain = absl::GetFlag(FLAGS_simulate_mic_gain); + SetSettingIfSpecified(absl::GetFlag(FLAGS_simulated_mic_kind), + &settings.simulated_mic_kind); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_override_key_pressed), + &settings.override_key_pressed); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_frame_for_sending_capture_output_used_false), + &settings.frame_for_sending_capture_output_used_false); + SetSettingIfSpecified( + absl::GetFlag(FLAGS_frame_for_sending_capture_output_used_true), + &settings.frame_for_sending_capture_output_used_true); + settings.report_performance = absl::GetFlag(FLAGS_performance_report); + SetSettingIfSpecified(absl::GetFlag(FLAGS_performance_report_output_file), + &settings.performance_report_output_filename); + settings.use_verbose_logging = absl::GetFlag(FLAGS_verbose); + settings.use_quiet_output = absl::GetFlag(FLAGS_quiet); + settings.report_bitexactness = absl::GetFlag(FLAGS_bitexactness_report); + settings.discard_all_settings_in_aecdump = + absl::GetFlag(FLAGS_discard_settings_in_aecdump); + settings.fixed_interface = absl::GetFlag(FLAGS_fixed_interface); + settings.store_intermediate_output = + absl::GetFlag(FLAGS_store_intermediate_output); + settings.print_aec_parameter_values = + absl::GetFlag(FLAGS_print_aec_parameter_values); + settings.dump_internal_data = absl::GetFlag(FLAGS_dump_data); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_data_output_dir), + &settings.dump_internal_data_output_dir); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_set_to_use), + &settings.dump_set_to_use); + settings.wav_output_format = absl::GetFlag(FLAGS_float_wav_output) + ? WavFile::SampleFormat::kFloat + : WavFile::SampleFormat::kInt16; + + settings.analysis_only = absl::GetFlag(FLAGS_analyze); + + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_start_frame), + &settings.dump_start_frame); + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_end_frame), + &settings.dump_end_frame); + + constexpr int kFramesPerSecond = 100; + absl::optional start_seconds; + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_start_seconds), + &start_seconds); + if (start_seconds) { + settings.dump_start_frame = *start_seconds * kFramesPerSecond; + } + + absl::optional end_seconds; + SetSettingIfSpecified(absl::GetFlag(FLAGS_dump_end_seconds), &end_seconds); + if (end_seconds) { + settings.dump_end_frame = *end_seconds * kFramesPerSecond; + } + + SetSettingIfSpecified(absl::GetFlag(FLAGS_init_to_process), + &settings.init_to_process); + + return settings; +} + +void ReportConditionalErrorAndExit(bool condition, absl::string_view message) { + if (condition) { + std::cerr << message << std::endl; + exit(1); + } +} + +void PerformBasicParameterSanityChecks( + const SimulationSettings& settings, + bool pre_constructed_ap_provided, + bool pre_constructed_ap_builder_provided) { + if (settings.input_filename || settings.reverse_input_filename) { + ReportConditionalErrorAndExit( + !!settings.aec_dump_input_filename, + "Error: The aec dump file cannot be specified " + "together with input wav files!\n"); + + ReportConditionalErrorAndExit( + !!settings.aec_dump_input_string, + "Error: The aec dump input string cannot be specified " + "together with input wav files!\n"); + + ReportConditionalErrorAndExit(!!settings.artificial_nearend_filename, + "Error: The artificial nearend cannot be " + "specified together with input wav files!\n"); + + ReportConditionalErrorAndExit(!settings.input_filename, + "Error: When operating at wav files, the " + "input wav filename must be " + "specified!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && !settings.reverse_input_filename, + "Error: When operating at wav files, the reverse input wav filename " + "must be specified if the reverse output wav filename is specified!\n"); + } else { + ReportConditionalErrorAndExit( + !settings.aec_dump_input_filename && !settings.aec_dump_input_string, + "Error: Either the aec dump input file, the wav " + "input file or the aec dump input string must be specified!\n"); + ReportConditionalErrorAndExit( + settings.aec_dump_input_filename && settings.aec_dump_input_string, + "Error: The aec dump input file cannot be specified together with the " + "aec dump input string!\n"); + } + + ReportConditionalErrorAndExit(settings.use_aec && !(*settings.use_aec) && + settings.linear_aec_output_filename, + "Error: The linear AEC ouput filename cannot " + "be specified without the AEC being active"); + + ReportConditionalErrorAndExit( + settings.use_aec && *settings.use_aec && settings.use_aecm && + *settings.use_aecm, + "Error: The AEC and the AECM cannot be activated at the same time!\n"); + + ReportConditionalErrorAndExit( + settings.output_sample_rate_hz && *settings.output_sample_rate_hz <= 0, + "Error: --output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_sample_rate_hz && + settings.output_sample_rate_hz && + *settings.output_sample_rate_hz <= 0, + "Error: --reverse_output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.output_num_channels && *settings.output_num_channels <= 0, + "Error: --output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_num_channels && + *settings.reverse_output_num_channels <= 0, + "Error: --reverse_output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.agc_target_level && ((*settings.agc_target_level) < 0 || + (*settings.agc_target_level) > 31), + "Error: --agc_target_level must be specified between 0 and 31.\n"); + + ReportConditionalErrorAndExit( + settings.agc_compression_gain && ((*settings.agc_compression_gain) < 0 || + (*settings.agc_compression_gain) > 90), + "Error: --agc_compression_gain must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.agc2_fixed_gain_db && ((*settings.agc2_fixed_gain_db) < 0 || + (*settings.agc2_fixed_gain_db) > 90), + "Error: --agc2_fixed_gain_db must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.ns_level && + ((*settings.ns_level) < 0 || (*settings.ns_level) > 3), + "Error: --ns_level must be specified between 0 and 3.\n"); + + ReportConditionalErrorAndExit( + settings.report_bitexactness && !settings.aec_dump_input_filename, + "Error: --bitexactness_report can only be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + settings.call_order_input_filename && settings.aec_dump_input_filename, + "Error: --custom_call_order_file cannot be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + (settings.initial_mic_level < 0 || settings.initial_mic_level > 255), + "Error: --initial_mic_level must be specified between 0 and 255.\n"); + + ReportConditionalErrorAndExit( + settings.simulated_mic_kind && !settings.simulate_mic_gain, + "Error: --simulated_mic_kind cannot be specified when mic simulation is " + "disabled\n"); + + ReportConditionalErrorAndExit( + !settings.simulated_mic_kind && settings.simulate_mic_gain, + "Error: --simulated_mic_kind must be specified when mic simulation is " + "enabled\n"); + + // TODO(bugs.webrtc.org/7494): Document how the two settings below differ. + ReportConditionalErrorAndExit( + settings.simulate_mic_gain && settings.use_analog_mic_gain_emulation, + "Error: --simulate_mic_gain and --use_analog_mic_gain_emulation cannot " + "be enabled at the same time\n"); + + auto valid_wav_name = [](absl::string_view wav_file_name) { + if (wav_file_name.size() < 5) { + return false; + } + if ((wav_file_name.compare(wav_file_name.size() - 4, 4, ".wav") == 0) || + (wav_file_name.compare(wav_file_name.size() - 4, 4, ".WAV") == 0)) { + return true; + } + return false; + }; + + ReportConditionalErrorAndExit( + settings.input_filename && (!valid_wav_name(*settings.input_filename)), + "Error: --i must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.output_filename && (!valid_wav_name(*settings.output_filename)), + "Error: --o must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_input_filename && + (!valid_wav_name(*settings.reverse_input_filename)), + "Error: --ri must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && + (!valid_wav_name(*settings.reverse_output_filename)), + "Error: --ro must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.artificial_nearend_filename && + !valid_wav_name(*settings.artificial_nearend_filename), + "Error: --artifical_nearend must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.linear_aec_output_filename && + (!valid_wav_name(*settings.linear_aec_output_filename)), + "Error: --linear_aec_output must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + WEBRTC_APM_DEBUG_DUMP == 0 && settings.dump_internal_data, + "Error: --dump_data cannot be set without proper build support.\n"); + + ReportConditionalErrorAndExit(settings.init_to_process && + *settings.init_to_process != 1 && + !settings.aec_dump_input_filename, + "Error: --init_to_process must be set to 1 for " + "wav-file based simulations.\n"); + + ReportConditionalErrorAndExit( + !settings.init_to_process && + (settings.dump_start_frame || settings.dump_end_frame), + "Error: --init_to_process must be set when specifying a start and/or end " + "frame for when to dump internal data.\n"); + + ReportConditionalErrorAndExit( + !settings.dump_internal_data && + settings.dump_internal_data_output_dir.has_value(), + "Error: --dump_data_output_dir cannot be set without --dump_data.\n"); + + ReportConditionalErrorAndExit( + !settings.aec_dump_input_filename && + settings.call_order_output_filename.has_value(), + "Error: --output_custom_call_order_file needs an AEC dump input file.\n"); + + ReportConditionalErrorAndExit( + (!settings.use_pre_amplifier || !(*settings.use_pre_amplifier)) && + settings.pre_amplifier_gain_factor.has_value(), + "Error: --pre_amplifier_gain_factor needs --pre_amplifier to be " + "specified and set.\n"); + + ReportConditionalErrorAndExit( + pre_constructed_ap_provided && pre_constructed_ap_builder_provided, + "Error: The AudioProcessing and the AudioProcessingBuilder cannot both " + "be specified at the same time.\n"); + + ReportConditionalErrorAndExit( + settings.aec_settings_filename && pre_constructed_ap_provided, + "Error: The aec_settings_filename cannot be specified when a " + "pre-constructed audio processing object is provided.\n"); + + ReportConditionalErrorAndExit( + settings.aec_settings_filename && pre_constructed_ap_provided, + "Error: The print_aec_parameter_values cannot be set when a " + "pre-constructed audio processing object is provided.\n"); + + if (settings.linear_aec_output_filename && pre_constructed_ap_provided) { + std::cout << "Warning: For the linear AEC output to be stored, this must " + "be configured in the AEC that is part of the provided " + "AudioProcessing object." + << std::endl; + } +} + +int RunSimulation(rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", kUsageDescription); + return 1; + } + // InitFieldTrialsFromString stores the char*, so the char array must + // outlive the application. + const std::string field_trials = absl::GetFlag(FLAGS_force_fieldtrials); + webrtc::field_trial::InitFieldTrialsFromString(field_trials.c_str()); + + SimulationSettings settings = CreateSettings(); + if (!input_aecdump.empty()) { + settings.aec_dump_input_string = input_aecdump; + settings.processed_capture_samples = processed_capture_samples; + RTC_CHECK(settings.processed_capture_samples); + } + PerformBasicParameterSanityChecks(settings, !!audio_processing, !!ap_builder); + std::unique_ptr processor; + + if (settings.aec_dump_input_filename || settings.aec_dump_input_string) { + processor.reset(new AecDumpBasedSimulator( + settings, std::move(audio_processing), std::move(ap_builder))); + } else { + processor.reset(new WavBasedSimulator(settings, std::move(audio_processing), + std::move(ap_builder))); + } + + if (settings.analysis_only) { + processor->Analyze(); + } else { + processor->Process(); + } + + if (settings.report_performance) { + processor->GetApiCallStatistics().PrintReport(); + } + if (settings.performance_report_output_filename) { + processor->GetApiCallStatistics().WriteReportToFile( + *settings.performance_report_output_filename); + } + + if (settings.report_bitexactness && settings.aec_dump_input_filename) { + if (processor->OutputWasBitexact()) { + std::cout << "The processing was bitexact."; + } else { + std::cout << "The processing was not bitexact."; + } + } + + return 0; +} + +} // namespace + +int AudioprocFloatImpl(rtc::scoped_refptr audio_processing, + int argc, + char* argv[]) { + return RunSimulation( + std::move(audio_processing), /*ap_builder=*/nullptr, argc, argv, + /*input_aecdump=*/"", /*processed_capture_samples=*/nullptr); +} + +int AudioprocFloatImpl(std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples) { + return RunSimulation(/*audio_processing=*/nullptr, std::move(ap_builder), + argc, argv, input_aecdump, processed_capture_samples); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h new file mode 100644 index 0000000000..5ed3aefab7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/audioproc_float_impl.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ + +#include + +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +namespace test { + +// This function implements the audio processing simulation utility. Pass +// `input_aecdump` to provide the content of an AEC dump file as a string; if +// `input_aecdump` is not passed, a WAV or AEC input dump file must be specified +// via the `argv` argument. Pass `processed_capture_samples` to write in it the +// samples processed on the capture side; if `processed_capture_samples` is not +// passed, the output file can optionally be specified via the `argv` argument. +// Any audio_processing object specified in the input is used for the +// simulation. Note that when the audio_processing object is specified all +// functionality that relies on using the internal builder is deactivated, +// since the AudioProcessing object is already created and the builder is not +// used in the simulation. +int AudioprocFloatImpl(rtc::scoped_refptr audio_processing, + int argc, + char* argv[]); + +// This function implements the audio processing simulation utility. Pass +// `input_aecdump` to provide the content of an AEC dump file as a string; if +// `input_aecdump` is not passed, a WAV or AEC input dump file must be specified +// via the `argv` argument. Pass `processed_capture_samples` to write in it the +// samples processed on the capture side; if `processed_capture_samples` is not +// passed, the output file can optionally be specified via the `argv` argument. +int AudioprocFloatImpl(std::unique_ptr ap_builder, + int argc, + char* argv[], + absl::string_view input_aecdump, + std::vector* processed_capture_samples); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIOPROC_FLOAT_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc new file mode 100644 index 0000000000..0464345364 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/bitexactness_tools.h" + +#include + +#include +#include +#include + +#include "api/array_view.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +std::string GetApmRenderTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("far8_stereo", "pcm"); + case 16000: + return ResourcePath("far16_stereo", "pcm"); + case 32000: + return ResourcePath("far32_stereo", "pcm"); + case 48000: + return ResourcePath("far48_stereo", "pcm"); + default: + RTC_DCHECK_NOTREACHED(); + } + return ""; +} + +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("near8_stereo", "pcm"); + case 16000: + return ResourcePath("near16_stereo", "pcm"); + case 32000: + return ResourcePath("near32_stereo", "pcm"); + case 48000: + return ResourcePath("near48_stereo", "pcm"); + default: + RTC_DCHECK_NOTREACHED(); + } + return ""; +} + +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView data) { + RTC_DCHECK_LE(num_channels, 2); + RTC_DCHECK_EQ(data.size(), samples_per_channel * num_channels); + std::vector read_samples(samples_per_channel * 2); + stereo_pcm_file->Read(samples_per_channel * 2, read_samples.data()); + + // Convert samples to float and discard any channels not needed. + for (size_t sample = 0; sample < samples_per_channel; ++sample) { + for (size_t channel = 0; channel < num_channels; ++channel) { + data[sample * num_channels + channel] = + read_samples[sample * 2 + channel] / 32768.0f; + } + } +} + +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound) { + // Form vectors to compare the reference to. Only the first values of the + // outputs are compared in order not having to specify all preceeding frames + // as testvectors. + const size_t reference_frame_length = + rtc::CheckedDivExact(reference.size(), num_channels); + + std::vector output_to_verify; + for (size_t channel_no = 0; channel_no < num_channels; ++channel_no) { + output_to_verify.insert(output_to_verify.end(), + output.begin() + channel_no * samples_per_channel, + output.begin() + channel_no * samples_per_channel + + reference_frame_length); + } + + return VerifyArray(reference, output_to_verify, element_error_bound); +} + +::testing::AssertionResult VerifyArray(rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound) { + // The vectors are deemed to be bitexact only if + // a) output have a size at least as long as the reference. + // b) the samples in the reference are bitexact with the corresponding samples + // in the output. + + bool equal = true; + if (output.size() < reference.size()) { + equal = false; + } else { + // Compare the first samples in the vectors. + for (size_t k = 0; k < reference.size(); ++k) { + if (fabs(output[k] - reference[k]) > element_error_bound) { + equal = false; + break; + } + } + } + + if (equal) { + return ::testing::AssertionSuccess(); + } + + // Lambda function that produces a formatted string with the data in the + // vector. + auto print_vector_in_c_format = [](rtc::ArrayView v, + size_t num_values_to_print) { + std::string s = "{ "; + for (size_t k = 0; k < std::min(num_values_to_print, v.size()); ++k) { + s += std::to_string(v[k]) + "f"; + s += (k < (num_values_to_print - 1)) ? ", " : ""; + } + return s + " }"; + }; + + // If the vectors are deemed not to be similar, return a report of the + // difference. + return ::testing::AssertionFailure() + << std::endl + << " Actual values : " + << print_vector_in_c_format(output, + std::min(output.size(), reference.size())) + << std::endl + << " Expected values: " + << print_vector_in_c_format(reference, reference.size()) << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h new file mode 100644 index 0000000000..2d3113276d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/bitexactness_tools.h @@ -0,0 +1,56 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ + +#include + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +// Returns test vector to use for the render signal in an +// APM bitexactness test. +std::string GetApmRenderTestVectorFileName(int sample_rate_hz); + +// Returns test vector to use for the capture signal in an +// APM bitexactness test. +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz); + +// Extract float samples of up to two channels from a pcm file. +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView data); + +// Verifies a frame against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound); + +// Verifies a vector against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyArray(rtc::ArrayView reference, + rtc::ArrayView output, + float element_error_bound); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn new file mode 100644 index 0000000000..2c3678092e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/BUILD.gn @@ -0,0 +1,81 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +if (!build_with_chromium) { + group("conversational_speech") { + testonly = true + deps = [ ":conversational_speech_generator" ] + } + + rtc_executable("conversational_speech_generator") { + testonly = true + sources = [ "generator.cc" ] + deps = [ + ":lib", + "../../../../test:fileutils", + "../../../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } +} + +rtc_library("lib") { + testonly = true + sources = [ + "config.cc", + "config.h", + "multiend_call.cc", + "multiend_call.h", + "simulator.cc", + "simulator.h", + "timing.cc", + "timing.h", + "wavreader_abstract_factory.h", + "wavreader_factory.cc", + "wavreader_factory.h", + "wavreader_interface.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base:stringutils", + "../../../../test:fileutils", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. +} + +rtc_library("unittest") { + testonly = true + sources = [ + "generator_unittest.cc", + "mock_wavreader.cc", + "mock_wavreader.h", + "mock_wavreader_factory.cc", + "mock_wavreader_factory.h", + ] + deps = [ + ":lib", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../../../test:fileutils", + "../../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS new file mode 100644 index 0000000000..07cff405e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/OWNERS @@ -0,0 +1,3 @@ +alessiob@webrtc.org +henrik.lundin@webrtc.org +peah@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md new file mode 100644 index 0000000000..0fa66669e6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/README.md @@ -0,0 +1,74 @@ +# Conversational Speech generator tool + +Tool to generate multiple-end audio tracks to simulate conversational speech +with two or more participants. + +The input to the tool is a directory containing a number of audio tracks and +a text file indicating how to time the sequence of speech turns (see the Example +section). + +Since the timing of the speaking turns is specified by the user, the generated +tracks may not be suitable for testing scenarios in which there is unpredictable +network delay (e.g., end-to-end RTC assessment). + +Instead, the generated pairs can be used when the delay is constant (obviously +including the case in which there is no delay). +For instance, echo cancellation in the APM module can be evaluated using two-end +audio tracks as input and reverse input. + +By indicating negative and positive time offsets, one can reproduce cross-talk +(aka double-talk) and silence in the conversation. + +### Example + +For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A) +and b1, b2 (speaker B). +The text file with the timing information may look like this: + +``` +A a1 0 +B b1 0 +A a2 100 +B b2 -200 +A a3 0 +A a4 0 +``` + +The first column indicates the speaker name, the second contains the audio track +file names, and the third the offsets (in milliseconds) used to concatenate the +chunks. An optional fourth column contains positive or negative integral gains +in dB that will be applied to the tracks. It's possible to specify the gain for +some turns but not for others. If the gain is left out, no gain is applied. + +Assume that all the audio tracks in the example above are 1000 ms long. +The tool will then generate two tracks (A and B) that look like this: + +**Track A** +``` + a1 (1000 ms) + silence (1100 ms) + a2 (1000 ms) + silence (800 ms) + a3 (1000 ms) + a4 (1000 ms) +``` + +**Track B** +``` + silence (1000 ms) + b1 (1000 ms) + silence (900 ms) + b2 (1000 ms) + silence (2000 ms) +``` + +The two tracks can be also visualized as follows (one characheter represents +100 ms, "." is silence and "*" is speech). + +``` +t: 0 1 2 3 4 5 6 (s) +A: **********...........**********........******************** +B: ..........**********.........**********.................... + ^ 200 ms cross-talk + 100 ms silence ^ +``` diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc new file mode 100644 index 0000000000..76d3de8108 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/config.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +const std::string& Config::audiotracks_path() const { + return audiotracks_path_; +} + +const std::string& Config::timing_filepath() const { + return timing_filepath_; +} + +const std::string& Config::output_path() const { + return output_path_; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h new file mode 100644 index 0000000000..5a847e06a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/config.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ + +#include + +#include "absl/strings/string_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Config { + Config(absl::string_view audiotracks_path, + absl::string_view timing_filepath, + absl::string_view output_path) + : audiotracks_path_(audiotracks_path), + timing_filepath_(timing_filepath), + output_path_(output_path) {} + + const std::string& audiotracks_path() const; + const std::string& timing_filepath() const; + const std::string& output_path() const; + + const std::string audiotracks_path_; + const std::string timing_filepath_; + const std::string output_path_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc new file mode 100644 index 0000000000..d0bc2f2319 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(std::string, i, "", "Directory containing the speech turn wav files"); +ABSL_FLAG(std::string, t, "", "Path to the timing text file"); +ABSL_FLAG(std::string, o, "", "Output wav files destination path"); + +namespace webrtc { +namespace test { +namespace { + +const char kUsageDescription[] = + "Usage: conversational_speech_generator\n" + " -i \n" + " -t \n" + " -o \n" + "\n\n" + "Command-line tool to generate multiple-end audio tracks to simulate " + "conversational speech with two or more participants.\n"; + +} // namespace + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", kUsageDescription); + return 1; + } + RTC_CHECK(DirExists(absl::GetFlag(FLAGS_i))); + RTC_CHECK(FileExists(absl::GetFlag(FLAGS_t))); + RTC_CHECK(DirExists(absl::GetFlag(FLAGS_o))); + + conversational_speech::Config config( + absl::GetFlag(FLAGS_i), absl::GetFlag(FLAGS_t), absl::GetFlag(FLAGS_o)); + + // Load timing. + std::vector timing = + conversational_speech::LoadTiming(config.timing_filepath()); + + // Parse timing and audio tracks. + auto wavreader_factory = + std::make_unique(); + conversational_speech::MultiEndCall multiend_call( + timing, config.audiotracks_path(), std::move(wavreader_factory)); + + // Generate output audio tracks. + auto generated_audiotrack_pairs = + conversational_speech::Simulate(multiend_call, config.output_path()); + + // Show paths to created audio tracks. + std::cout << "Output files:" << std::endl; + for (const auto& output_paths_entry : *generated_audiotrack_pairs) { + std::cout << " speaker: " << output_paths_entry.first << std::endl; + std::cout << " near end: " << output_paths_entry.second.near_end + << std::endl; + std::cout << " far end: " << output_paths_entry.second.far_end + << std::endl; + } + + return 0; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc new file mode 100644 index 0000000000..17714440d4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc @@ -0,0 +1,675 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file consists of unit tests for webrtc::test::conversational_speech +// members. Part of them focus on accepting or rejecting different +// conversational speech setups. A setup is defined by a set of audio tracks and +// timing information). +// The docstring at the beginning of each TEST(ConversationalSpeechTest, +// MultiEndCallSetup*) function looks like the drawing below and indicates which +// setup is tested. +// +// Accept: +// A 0****..... +// B .....1**** +// +// The drawing indicates the following: +// - the illustrated setup should be accepted, +// - there are two speakers (namely, A and B), +// - A is the first speaking, B is the second one, +// - each character after the speaker's letter indicates a time unit (e.g., 100 +// ms), +// - "*" indicates speaking, "." listening, +// - numbers indicate the turn index in std::vector. +// +// Note that the same speaker can appear in multiple lines in order to depict +// cases in which there are wrong offsets leading to self cross-talk (which is +// rejected). + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::LoadTiming; +using conversational_speech::MockWavReaderFactory; +using conversational_speech::MultiEndCall; +using conversational_speech::SaveTiming; +using conversational_speech::Turn; +using conversational_speech::WavReaderFactory; + +const char* const audiotracks_path = "/path/to/audiotracks"; +const char* const timing_filepath = "/path/to/timing_file.txt"; +const char* const output_path = "/path/to/output_dir"; + +const std::vector expected_timing = { + {"A", "a1", 0, 0}, {"B", "b1", 0, 0}, {"A", "a2", 100, 0}, + {"B", "b2", -200, 0}, {"A", "a3", 0, 0}, {"A", "a3", 0, 0}, +}; +const std::size_t kNumberOfTurns = expected_timing.size(); + +// Default arguments for MockWavReaderFactory ctor. +// Fake audio track parameters. +constexpr int kDefaultSampleRate = 48000; +const std::map + kDefaultMockWavReaderFactoryParamsMap = { + {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. + {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. + {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. + {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. + {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. + {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. +}; +const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = + kDefaultMockWavReaderFactoryParamsMap.at("t500"); + +std::unique_ptr CreateMockWavReaderFactory() { + return std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); +} + +void CreateSineWavFile(absl::string_view filepath, + const MockWavReaderFactory::Params& params, + float frequency = 440.0f) { + // Create samples. + constexpr double two_pi = 2.0 * M_PI; + std::vector samples(params.num_samples); + for (std::size_t i = 0; i < params.num_samples; ++i) { + // TODO(alessiob): the produced tone is not pure, improve. + samples[i] = std::lround( + 32767.0f * std::sin(two_pi * i * frequency / params.sample_rate)); + } + + // Write samples. + WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); + wav_writer.WriteSamples(samples.data(), params.num_samples); +} + +// Parameters to generate audio tracks with CreateSineWavFile. +struct SineAudioTrackParams { + MockWavReaderFactory::Params params; + float frequency; +}; + +// Creates a temporary directory in which sine audio tracks are written. +std::string CreateTemporarySineAudioTracks( + const std::map& sine_tracks_params) { + // Create temporary directory. + std::string temp_directory = + OutputPath() + "TempConversationalSpeechAudioTracks"; + CreateDir(temp_directory); + + // Create sine tracks. + for (const auto& it : sine_tracks_params) { + const std::string temp_filepath = JoinFilename(temp_directory, it.first); + CreateSineWavFile(temp_filepath, it.second.params, it.second.frequency); + } + + return temp_directory; +} + +void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, + absl::string_view filepath, + const MockWavReaderFactory::Params& expeted_params) { + auto wav_reader = wav_reader_factory.Create(filepath); + EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); + EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); + EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); +} + +void DeleteFolderAndContents(absl::string_view dir) { + if (!DirExists(dir)) { + return; + } + absl::optional> dir_content = ReadDirectory(dir); + EXPECT_TRUE(dir_content); + for (const auto& path : *dir_content) { + if (DirExists(path)) { + DeleteFolderAndContents(path); + } else if (FileExists(path)) { + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveFile(path); + } else { + FAIL(); + } + } + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveDir(dir); +} + +} // namespace + +using ::testing::_; + +TEST(ConversationalSpeechTest, Settings) { + const conversational_speech::Config config(audiotracks_path, timing_filepath, + output_path); + + // Test getters. + EXPECT_EQ(audiotracks_path, config.audiotracks_path()); + EXPECT_EQ(timing_filepath, config.timing_filepath()); + EXPECT_EQ(output_path, config.output_path()); +} + +TEST(ConversationalSpeechTest, TimingSaveLoad) { + // Save test timing. + const std::string temporary_filepath = + TempFilename(OutputPath(), "TempTimingTestFile"); + SaveTiming(temporary_filepath, expected_timing); + + // Create a std::vector instance by loading from file. + std::vector actual_timing = LoadTiming(temporary_filepath); + RemoveFile(temporary_filepath); + + // Check size. + EXPECT_EQ(expected_timing.size(), actual_timing.size()); + + // Check Turn instances. + for (size_t index = 0; index < expected_timing.size(); ++index) { + EXPECT_EQ(expected_timing[index], actual_timing[index]) + << "turn #" << index << " not matching"; + } +} + +TEST(ConversationalSpeechTest, MultiEndCallCreate) { + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are 5 unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); + + // Inject the mock wav reader factory. + conversational_speech::MultiEndCall multiend_call( + expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(6u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { + const std::vector timing = { + {"A", "sr8000", 0, 0}, + {"B", "sr16000", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { + const std::vector timing = { + {"A", "sr16000_stereo", 0, 0}, + {"B", "sr16000_stereo", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(1); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, + MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { + const std::vector timing = { + {"A", "sr8000", 0, 0}, + {"B", "sr16000_stereo", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(::testing::_)).Times(2); + + MultiEndCall multiend_call(timing, audiotracks_path, + std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { + const std::vector timing = { + {"A", "t500", -100, 0}, + {"B", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) { + // Accept: + // A 0****..... + // B .....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupPause) { + // Accept: + // A 0****....... + // B .......1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", 200, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { + // Accept: + // A 0****.... + // B ....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", -100, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { + // Reject: + // A ..0**** + // B .1****. The n-th turn cannot start before the (n-1)-th one. + const std::vector timing = { + {"A", "t500", 200, 0}, + {"B", "t500", -600, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { + // Accept: + // A 0****2****... + // B ...1********* + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t1000", -200, 0}, + {"A", "t500", -800, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { + // Reject: + // A 0****...... + // A ...1****... + // B ......2**** + // ^ Turn #1 overlaps with #0 which is from the same speaker. + const std::vector timing = { + {"A", "t500", 0, 0}, + {"A", "t500", -200, 0}, + {"B", "t500", -200, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { + // Reject: + // A 0********* + // B 1**....... + // C ...2**.... + // A ......3**. + // ^ Turn #3 overlaps with #0 which is from the same speaker. + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t300", -1000, 0}, + {"C", "t300", 0, 0}, + {"A", "t300", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { + // Accept: + // A 0*********.. + // B ..1****..... + // C .......2**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -800, 0}, + {"C", "t500", 0, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { + // Reject: + // A 0********* + // B ..1****... + // C ....2****. + // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers + // not permitted). + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -800, 0}, + {"C", "t500", -300, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { + // Accept: + // A 0*********.. + // B .2****...... + // C .......3**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector timing = { + {"A", "t1000", 0, 0}, + {"B", "t500", -900, 0}, + {"C", "t500", 100, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { + // Accept: + // A 0**** + // B 1**** + const std::vector timing = { + {"A", "t500", 0, 0}, + {"B", "t500", -500, 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { + // Accept: + // A 0****....3****.5**. + // B .....1****...4**... + // C ......2**.......6**.. + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; + const std::vector timing = { + {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0}, + {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -100, 0}, + {"C", "t300", -200, 0}, + }; + auto mock_wavreader_factory = std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(7u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { + // Reject: + // A 0****....3****.6** + // B .....1****...4**.. + // C ......2**.....5**.. + // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ + // speakers not permitted). + const std::vector timing = { + {"A", "t500", 0, 0}, {"B", "t500", 0, 0}, {"C", "t300", -400, 0}, + {"A", "t500", 0, 0}, {"B", "t300", -100, 0}, {"A", "t300", -200, 0}, + {"C", "t300", -200, 0}, + }; + auto mock_wavreader_factory = std::unique_ptr( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { + // Parameters with which wav files are created. + constexpr int duration_seconds = 5; + const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; + + for (int sample_rate : sample_rates) { + const std::string temp_filename = OutputPath() + "TempSineWavFile_" + + std::to_string(sample_rate) + ".wav"; + + // Write wav file. + const std::size_t num_samples = duration_seconds * sample_rate; + MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; + CreateSineWavFile(temp_filename, params); + + // Load wav file and check if params match. + WavReaderFactory wav_reader_factory; + MockWavReaderFactory::Params expeted_params = {sample_rate, 1u, + num_samples}; + CheckAudioTrackParams(wav_reader_factory, temp_filename, expeted_params); + + // Clean up. + RemoveFile(temp_filename); + } +} + +TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) { + // Simulated call (one character corresponding to 500 ms): + // A 0*********...........2*********..... + // B ...........1*********.....3********* + const std::vector expected_timing = { + {"A", "t5000_440.wav", 0, 0}, + {"B", "t5000_880.wav", 500, 0}, + {"A", "t5000_440.wav", 0, 0}, + {"B", "t5000_880.wav", -2500, 0}, + }; + const std::size_t expected_duration_seconds = 18; + + // Create temporary audio track files. + const int sample_rate = 16000; + const std::map sine_tracks_params = { + {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, + {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, + }; + const std::string audiotracks_path = + CreateTemporarySineAudioTracks(sine_tracks_params); + + // Set up the multi-end call. + auto wavreader_factory = + std::unique_ptr(new WavReaderFactory()); + MultiEndCall multiend_call(expected_timing, audiotracks_path, + std::move(wavreader_factory)); + + // Simulate the call. + std::string output_path = JoinFilename(audiotracks_path, "output"); + CreateDir(output_path); + RTC_LOG(LS_VERBOSE) << "simulator output path: " << output_path; + auto generated_audiotrak_pairs = + conversational_speech::Simulate(multiend_call, output_path); + EXPECT_EQ(2u, generated_audiotrak_pairs->size()); + + // Check the output. + WavReaderFactory wav_reader_factory; + const MockWavReaderFactory::Params expeted_params = { + sample_rate, 1u, sample_rate * expected_duration_seconds}; + for (const auto& it : *generated_audiotrak_pairs) { + RTC_LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; + CheckAudioTrackParams(wav_reader_factory, it.second.near_end, + expeted_params); + CheckAudioTrackParams(wav_reader_factory, it.second.far_end, + expeted_params); + } + + // Clean. + EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc new file mode 100644 index 0000000000..1263e938c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using ::testing::Return; + +MockWavReader::MockWavReader(int sample_rate, + size_t num_channels, + size_t num_samples) + : sample_rate_(sample_rate), + num_channels_(num_channels), + num_samples_(num_samples) { + ON_CALL(*this, SampleRate()).WillByDefault(Return(sample_rate_)); + ON_CALL(*this, NumChannels()).WillByDefault(Return(num_channels_)); + ON_CALL(*this, NumSamples()).WillByDefault(Return(num_samples_)); +} + +MockWavReader::~MockWavReader() = default; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h new file mode 100644 index 0000000000..94e20b9ec6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReader : public WavReaderInterface { + public: + MockWavReader(int sample_rate, size_t num_channels, size_t num_samples); + ~MockWavReader(); + + // TODO(alessiob): use ON_CALL to return random samples if needed. + MOCK_METHOD(size_t, ReadFloatSamples, (rtc::ArrayView), (override)); + MOCK_METHOD(size_t, ReadInt16Samples, (rtc::ArrayView), (override)); + + MOCK_METHOD(int, SampleRate, (), (const, override)); + MOCK_METHOD(size_t, NumChannels, (), (const, override)); + MOCK_METHOD(size_t, NumSamples, (), (const, override)); + + private: + const int sample_rate_; + const size_t num_channels_; + const size_t num_samples_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc new file mode 100644 index 0000000000..a377cce7e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" +#include "rtc_base/logging.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using ::testing::_; +using ::testing::Invoke; + +MockWavReaderFactory::MockWavReaderFactory( + const Params& default_params, + const std::map& params) + : default_params_(default_params), audiotrack_names_params_(params) { + ON_CALL(*this, Create(_)) + .WillByDefault(Invoke(this, &MockWavReaderFactory::CreateMock)); +} + +MockWavReaderFactory::MockWavReaderFactory(const Params& default_params) + : MockWavReaderFactory(default_params, + std::map{}) {} + +MockWavReaderFactory::~MockWavReaderFactory() = default; + +std::unique_ptr MockWavReaderFactory::CreateMock( + absl::string_view filepath) { + // Search the parameters corresponding to filepath. + size_t delimiter = filepath.find_last_of("/\\"); // Either windows or posix + std::string filename(filepath.substr( + delimiter == absl::string_view::npos ? 0 : delimiter + 1)); + const auto it = audiotrack_names_params_.find(filename); + + // If not found, use default parameters. + if (it == audiotrack_names_params_.end()) { + RTC_LOG(LS_VERBOSE) << "using default parameters for " << filepath; + return std::unique_ptr(new MockWavReader( + default_params_.sample_rate, default_params_.num_channels, + default_params_.num_samples)); + } + + // Found, use the audiotrack-specific parameters. + RTC_LOG(LS_VERBOSE) << "using ad-hoc parameters for " << filepath; + RTC_LOG(LS_VERBOSE) << "sample_rate " << it->second.sample_rate; + RTC_LOG(LS_VERBOSE) << "num_channels " << it->second.num_channels; + RTC_LOG(LS_VERBOSE) << "num_samples " << it->second.num_samples; + return std::unique_ptr(new MockWavReader( + it->second.sample_rate, it->second.num_channels, it->second.num_samples)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h new file mode 100644 index 0000000000..bcc7f3069b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReaderFactory : public WavReaderAbstractFactory { + public: + struct Params { + int sample_rate; + size_t num_channels; + size_t num_samples; + }; + + MockWavReaderFactory(const Params& default_params, + const std::map& params); + explicit MockWavReaderFactory(const Params& default_params); + ~MockWavReaderFactory(); + + MOCK_METHOD(std::unique_ptr, + Create, + (absl::string_view), + (const, override)); + + private: + // Creates a MockWavReader instance using the parameters in + // audiotrack_names_params_ if the entry corresponding to filepath exists, + // otherwise creates a MockWavReader instance using the default parameters. + std::unique_ptr CreateMock(absl::string_view filepath); + + const Params& default_params_; + std::map audiotrack_names_params_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc new file mode 100644 index 0000000000..952114a78b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/logging.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +MultiEndCall::MultiEndCall( + rtc::ArrayView timing, + absl::string_view audiotracks_path, + std::unique_ptr wavreader_abstract_factory) + : timing_(timing), + audiotracks_path_(audiotracks_path), + wavreader_abstract_factory_(std::move(wavreader_abstract_factory)), + valid_(false) { + FindSpeakerNames(); + if (CreateAudioTrackReaders()) + valid_ = CheckTiming(); +} + +MultiEndCall::~MultiEndCall() = default; + +void MultiEndCall::FindSpeakerNames() { + RTC_DCHECK(speaker_names_.empty()); + for (const Turn& turn : timing_) { + speaker_names_.emplace(turn.speaker_name); + } +} + +bool MultiEndCall::CreateAudioTrackReaders() { + RTC_DCHECK(audiotrack_readers_.empty()); + sample_rate_hz_ = 0; // Sample rate will be set when reading the first track. + for (const Turn& turn : timing_) { + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + if (it != audiotrack_readers_.end()) + continue; + + const std::string audiotrack_file_path = + test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name); + + // Map the audiotrack file name to a new instance of WavReaderInterface. + std::unique_ptr wavreader = + wavreader_abstract_factory_->Create( + test::JoinFilename(audiotracks_path_, turn.audiotrack_file_name)); + + if (sample_rate_hz_ == 0) { + sample_rate_hz_ = wavreader->SampleRate(); + } else if (sample_rate_hz_ != wavreader->SampleRate()) { + RTC_LOG(LS_ERROR) + << "All the audio tracks should have the same sample rate."; + return false; + } + + if (wavreader->NumChannels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono audio tracks supported."; + return false; + } + + audiotrack_readers_.emplace(turn.audiotrack_file_name, + std::move(wavreader)); + } + + return true; +} + +bool MultiEndCall::CheckTiming() { + struct Interval { + size_t begin; + size_t end; + }; + size_t number_of_turns = timing_.size(); + auto millisecond_to_samples = [](int ms, int sr) -> int { + // Truncation may happen if the sampling rate is not an integer multiple + // of 1000 (e.g., 44100). + return ms * sr / 1000; + }; + auto in_interval = [](size_t value, const Interval& interval) { + return interval.begin <= value && value < interval.end; + }; + total_duration_samples_ = 0; + speaking_turns_.clear(); + + // Begin and end timestamps for the last two turns (unit: number of samples). + Interval second_last_turn = {0, 0}; + Interval last_turn = {0, 0}; + + // Initialize map to store speaking turn indices of each speaker (used to + // detect self cross-talk). + std::map> speaking_turn_indices; + for (const std::string& speaker_name : speaker_names_) { + speaking_turn_indices.emplace(std::piecewise_construct, + std::forward_as_tuple(speaker_name), + std::forward_as_tuple()); + } + + // Parse turns. + for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) { + const Turn& turn = timing_[turn_index]; + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + RTC_CHECK(it != audiotrack_readers_.end()) + << "Audio track reader not created"; + + // Begin and end timestamps for the current turn. + int offset_samples = + millisecond_to_samples(turn.offset, it->second->SampleRate()); + std::size_t begin_timestamp = last_turn.end + offset_samples; + std::size_t end_timestamp = begin_timestamp + it->second->NumSamples(); + RTC_LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp << "-" + << end_timestamp << " ms"; + + // The order is invalid if the offset is negative and its absolute value is + // larger then the duration of the previous turn. + if (offset_samples < 0 && + -offset_samples > static_cast(last_turn.end - last_turn.begin)) { + RTC_LOG(LS_ERROR) << "invalid order"; + return false; + } + + // Cross-talk with 3 or more speakers occurs when the beginning of the + // current interval falls in the last two turns. + if (turn_index > 1 && in_interval(begin_timestamp, last_turn) && + in_interval(begin_timestamp, second_last_turn)) { + RTC_LOG(LS_ERROR) << "cross-talk with 3+ speakers"; + return false; + } + + // Append turn. + speaking_turns_.emplace_back(turn.speaker_name, turn.audiotrack_file_name, + begin_timestamp, end_timestamp, turn.gain); + + // Save speaking turn index for self cross-talk detection. + RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1); + speaking_turn_indices[turn.speaker_name].push_back(turn_index); + + // Update total duration of the consversational speech. + if (total_duration_samples_ < end_timestamp) + total_duration_samples_ = end_timestamp; + + // Update and continue with next turn. + second_last_turn = last_turn; + last_turn.begin = begin_timestamp; + last_turn.end = end_timestamp; + } + + // Detect self cross-talk. + for (const std::string& speaker_name : speaker_names_) { + RTC_LOG(LS_INFO) << "checking self cross-talk for <" << speaker_name << ">"; + + // Copy all turns for this speaker to new vector. + std::vector speaking_turns_for_name; + std::copy_if(speaking_turns_.begin(), speaking_turns_.end(), + std::back_inserter(speaking_turns_for_name), + [&speaker_name](const SpeakingTurn& st) { + return st.speaker_name == speaker_name; + }); + + // Check for overlap between adjacent elements. + // This is a sufficient condition for self cross-talk since the intervals + // are sorted by begin timestamp. + auto overlap = std::adjacent_find( + speaking_turns_for_name.begin(), speaking_turns_for_name.end(), + [](const SpeakingTurn& a, const SpeakingTurn& b) { + return a.end > b.begin; + }); + + if (overlap != speaking_turns_for_name.end()) { + RTC_LOG(LS_ERROR) << "Self cross-talk detected"; + return false; + } + } + + return true; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h new file mode 100644 index 0000000000..63283465fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/multiend_call.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ + +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MultiEndCall { + public: + struct SpeakingTurn { + // Constructor required in order to use std::vector::emplace_back(). + SpeakingTurn(absl::string_view new_speaker_name, + absl::string_view new_audiotrack_file_name, + size_t new_begin, + size_t new_end, + int gain) + : speaker_name(new_speaker_name), + audiotrack_file_name(new_audiotrack_file_name), + begin(new_begin), + end(new_end), + gain(gain) {} + std::string speaker_name; + std::string audiotrack_file_name; + size_t begin; + size_t end; + int gain; + }; + + MultiEndCall( + rtc::ArrayView timing, + absl::string_view audiotracks_path, + std::unique_ptr wavreader_abstract_factory); + ~MultiEndCall(); + + MultiEndCall(const MultiEndCall&) = delete; + MultiEndCall& operator=(const MultiEndCall&) = delete; + + const std::set& speaker_names() const { return speaker_names_; } + const std::map>& + audiotrack_readers() const { + return audiotrack_readers_; + } + bool valid() const { return valid_; } + int sample_rate() const { return sample_rate_hz_; } + size_t total_duration_samples() const { return total_duration_samples_; } + const std::vector& speaking_turns() const { + return speaking_turns_; + } + + private: + // Finds unique speaker names. + void FindSpeakerNames(); + + // Creates one WavReader instance for each unique audiotrack. It returns false + // if the audio tracks do not have the same sample rate or if they are not + // mono. + bool CreateAudioTrackReaders(); + + // Validates the speaking turns timing information. Accepts cross-talk, but + // only up to 2 speakers. Rejects unordered turns and self cross-talk. + bool CheckTiming(); + + rtc::ArrayView timing_; + std::string audiotracks_path_; + std::unique_ptr wavreader_abstract_factory_; + std::set speaker_names_; + std::map> + audiotrack_readers_; + bool valid_; + int sample_rate_hz_; + size_t total_duration_samples_; + std::vector speaking_turns_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc new file mode 100644 index 0000000000..89bcd48d84 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.cc @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/simulator.h" + +#include + +#include +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::MultiEndCall; +using conversational_speech::SpeakerOutputFilePaths; +using conversational_speech::WavReaderInterface; + +// Combines output path and speaker names to define the output file paths for +// the near-end and far=end audio tracks. +std::unique_ptr> +InitSpeakerOutputFilePaths(const std::set& speaker_names, + absl::string_view output_path) { + // Create map. + auto speaker_output_file_paths_map = + std::make_unique>(); + + // Add near-end and far-end output paths into the map. + for (const auto& speaker_name : speaker_names) { + const std::string near_end_path = + test::JoinFilename(output_path, "s_" + speaker_name + "-near_end.wav"); + RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in " + << near_end_path << "."; + + const std::string far_end_path = + test::JoinFilename(output_path, "s_" + speaker_name + "-far_end.wav"); + RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in " + << far_end_path << "."; + + // Add to map. + speaker_output_file_paths_map->emplace( + std::piecewise_construct, std::forward_as_tuple(speaker_name), + std::forward_as_tuple(near_end_path, far_end_path)); + } + + return speaker_output_file_paths_map; +} + +// Class that provides one WavWriter for the near-end and one for the far-end +// output track of a speaker. +class SpeakerWavWriters { + public: + SpeakerWavWriters(const SpeakerOutputFilePaths& output_file_paths, + int sample_rate) + : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), + far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} + WavWriter* near_end_wav_writer() { return &near_end_wav_writer_; } + WavWriter* far_end_wav_writer() { return &far_end_wav_writer_; } + + private: + WavWriter near_end_wav_writer_; + WavWriter far_end_wav_writer_; +}; + +// Initializes one WavWriter instance for each speaker and both the near-end and +// far-end output tracks. +std::unique_ptr> +InitSpeakersWavWriters(const std::map& + speaker_output_file_paths, + int sample_rate) { + // Create map. + auto speaker_wav_writers_map = + std::make_unique>(); + + // Add SpeakerWavWriters instance into the map. + for (auto it = speaker_output_file_paths.begin(); + it != speaker_output_file_paths.end(); ++it) { + speaker_wav_writers_map->emplace( + std::piecewise_construct, std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second, sample_rate)); + } + + return speaker_wav_writers_map; +} + +// Reads all the samples for each audio track. +std::unique_ptr>> PreloadAudioTracks( + const std::map>& + audiotrack_readers) { + // Create map. + auto audiotracks_map = + std::make_unique>>(); + + // Add audio track vectors. + for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); + ++it) { + // Add map entry. + audiotracks_map->emplace(std::piecewise_construct, + std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second->NumSamples())); + + // Read samples. + it->second->ReadInt16Samples(audiotracks_map->at(it->first)); + } + + return audiotracks_map; +} + +// Writes all the values in `source_samples` via `wav_writer`. If the number of +// previously written samples in `wav_writer` is less than `interval_begin`, it +// adds zeros as left padding. The padding corresponds to intervals during which +// a speaker is not active. +void PadLeftWriteChunk(rtc::ArrayView source_samples, + size_t interval_begin, + WavWriter* wav_writer) { + // Add left padding. + RTC_CHECK(wav_writer); + RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); + size_t padding_size = interval_begin - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } + + // Write source samples. + wav_writer->WriteSamples(source_samples.data(), source_samples.size()); +} + +// Appends zeros via `wav_writer`. The number of zeros is always non-negative +// and equal to the difference between the previously written samples and +// `pad_samples`. +void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { + RTC_CHECK(wav_writer); + RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); + size_t padding_size = pad_samples - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } +} + +void ScaleSignal(rtc::ArrayView source_samples, + int gain, + rtc::ArrayView output_samples) { + const float gain_linear = DbToRatio(gain); + RTC_DCHECK_EQ(source_samples.size(), output_samples.size()); + std::transform(source_samples.begin(), source_samples.end(), + output_samples.begin(), [gain_linear](int16_t x) -> int16_t { + return rtc::saturated_cast(x * gain_linear); + }); +} + +} // namespace + +namespace conversational_speech { + +std::unique_ptr> Simulate( + const MultiEndCall& multiend_call, + absl::string_view output_path) { + // Set output file paths and initialize wav writers. + const auto& speaker_names = multiend_call.speaker_names(); + auto speaker_output_file_paths = + InitSpeakerOutputFilePaths(speaker_names, output_path); + auto speakers_wav_writers = InitSpeakersWavWriters( + *speaker_output_file_paths, multiend_call.sample_rate()); + + // Preload all the input audio tracks. + const auto& audiotrack_readers = multiend_call.audiotrack_readers(); + auto audiotracks = PreloadAudioTracks(audiotrack_readers); + + // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end + // across the 2 speakers are symmetric; hence, the code below could be + // replaced by only creating the near-end or the far-end. However, this would + // require to split the unit tests and document the behavior in README.md. + // In practice, it should not be an issue since the files are not expected to + // be signinificant. + + // Write near-end and far-end output tracks. + for (const auto& speaking_turn : multiend_call.speaking_turns()) { + const std::string& active_speaker_name = speaking_turn.speaker_name; + const auto source_audiotrack = + audiotracks->at(speaking_turn.audiotrack_file_name); + std::vector scaled_audiotrack(source_audiotrack.size()); + ScaleSignal(source_audiotrack, speaking_turn.gain, scaled_audiotrack); + + // Write active speaker's chunk to active speaker's near-end. + PadLeftWriteChunk( + scaled_audiotrack, speaking_turn.begin, + speakers_wav_writers->at(active_speaker_name).near_end_wav_writer()); + + // Write active speaker's chunk to other participants' far-ends. + for (const std::string& speaker_name : speaker_names) { + if (speaker_name == active_speaker_name) + continue; + PadLeftWriteChunk( + scaled_audiotrack, speaking_turn.begin, + speakers_wav_writers->at(speaker_name).far_end_wav_writer()); + } + } + + // Finalize all the output tracks with right padding. + // This is required to make all the output tracks duration equal. + size_t duration_samples = multiend_call.total_duration_samples(); + for (const std::string& speaker_name : speaker_names) { + PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), + duration_samples); + PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), + duration_samples); + } + + return speaker_output_file_paths; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h new file mode 100644 index 0000000000..2f311e16b3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/simulator.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ + +#include +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct SpeakerOutputFilePaths { + SpeakerOutputFilePaths(absl::string_view new_near_end, + absl::string_view new_far_end) + : near_end(new_near_end), far_end(new_far_end) {} + // Paths to the near-end and far-end audio track files. + const std::string near_end; + const std::string far_end; +}; + +// Generates the near-end and far-end audio track pairs for each speaker. +std::unique_ptr> Simulate( + const MultiEndCall& multiend_call, + absl::string_view output_path); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc new file mode 100644 index 0000000000..95ec9f542e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/timing.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/string_encode.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +bool Turn::operator==(const Turn& b) const { + return b.speaker_name == speaker_name && + b.audiotrack_file_name == audiotrack_file_name && b.offset == offset && + b.gain == gain; +} + +std::vector LoadTiming(absl::string_view timing_filepath) { + // Line parser. + auto parse_line = [](absl::string_view line) { + std::vector fields = rtc::split(line, ' '); + RTC_CHECK_GE(fields.size(), 3); + RTC_CHECK_LE(fields.size(), 4); + int gain = 0; + if (fields.size() == 4) { + gain = rtc::StringToNumber(fields[3]).value_or(0); + } + return Turn(fields[0], fields[1], + rtc::StringToNumber(fields[2]).value_or(0), gain); + }; + + // Init. + std::vector timing; + + // Parse lines. + std::string line; + std::ifstream infile(std::string{timing_filepath}); + while (std::getline(infile, line)) { + if (line.empty()) + continue; + timing.push_back(parse_line(line)); + } + infile.close(); + + return timing; +} + +void SaveTiming(absl::string_view timing_filepath, + rtc::ArrayView timing) { + std::ofstream outfile(std::string{timing_filepath}); + RTC_CHECK(outfile.is_open()); + for (const Turn& turn : timing) { + outfile << turn.speaker_name << " " << turn.audiotrack_file_name << " " + << turn.offset << " " << turn.gain << std::endl; + } + outfile.close(); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h new file mode 100644 index 0000000000..9314f6fc43 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/timing.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Turn { + Turn(absl::string_view new_speaker_name, + absl::string_view new_audiotrack_file_name, + int new_offset, + int gain) + : speaker_name(new_speaker_name), + audiotrack_file_name(new_audiotrack_file_name), + offset(new_offset), + gain(gain) {} + bool operator==(const Turn& b) const; + std::string speaker_name; + std::string audiotrack_file_name; + int offset; + int gain; +}; + +// Loads a list of turns from a file. +std::vector LoadTiming(absl::string_view timing_filepath); + +// Writes a list of turns into a file. +void SaveTiming(absl::string_view timing_filepath, + rtc::ArrayView timing); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h new file mode 100644 index 0000000000..14ddfc7539 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderAbstractFactory { + public: + virtual ~WavReaderAbstractFactory() = default; + virtual std::unique_ptr Create( + absl::string_view filepath) const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc new file mode 100644 index 0000000000..99b1686484 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" + +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::WavReaderInterface; + +class WavReaderAdaptor final : public WavReaderInterface { + public: + explicit WavReaderAdaptor(absl::string_view filepath) + : wav_reader_(filepath) {} + ~WavReaderAdaptor() override = default; + + size_t ReadFloatSamples(rtc::ArrayView samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + size_t ReadInt16Samples(rtc::ArrayView samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + int SampleRate() const override { return wav_reader_.sample_rate(); } + + size_t NumChannels() const override { return wav_reader_.num_channels(); } + + size_t NumSamples() const override { return wav_reader_.num_samples(); } + + private: + WavReader wav_reader_; +}; + +} // namespace + +namespace conversational_speech { + +WavReaderFactory::WavReaderFactory() = default; + +WavReaderFactory::~WavReaderFactory() = default; + +std::unique_ptr WavReaderFactory::Create( + absl::string_view filepath) const { + return std::unique_ptr(new WavReaderAdaptor(filepath)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h new file mode 100644 index 0000000000..f2e5b61055 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderFactory : public WavReaderAbstractFactory { + public: + WavReaderFactory(); + ~WavReaderFactory() override; + std::unique_ptr Create( + absl::string_view filepath) const override; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h new file mode 100644 index 0000000000..c74f639461 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ + +#include + +#include "api/array_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderInterface { + public: + virtual ~WavReaderInterface() = default; + + // Returns the number of samples read. + virtual size_t ReadFloatSamples(rtc::ArrayView samples) = 0; + virtual size_t ReadInt16Samples(rtc::ArrayView samples) = 0; + + // Getters. + virtual int SampleRate() const = 0; + virtual size_t NumChannels() const = 0; + virtual size_t NumSamples() const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc new file mode 100644 index 0000000000..2f483f5077 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/debug_dump_replayer.h" + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/runtime_setting_util.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset( + new ChannelBuffer(config.num_frames(), config.num_channels())); + } +} + +} // namespace + +DebugDumpReplayer::DebugDumpReplayer() + : input_(nullptr), // will be created upon usage. + reverse_(nullptr), + output_(nullptr), + apm_(nullptr), + debug_file_(nullptr) {} + +DebugDumpReplayer::~DebugDumpReplayer() { + if (debug_file_) + fclose(debug_file_); +} + +bool DebugDumpReplayer::SetDumpFile(absl::string_view filename) { + debug_file_ = fopen(std::string(filename).c_str(), "rb"); + LoadNextMessage(); + return debug_file_; +} + +// Get next event that has not run. +absl::optional DebugDumpReplayer::GetNextEvent() const { + if (!has_next_event_) + return absl::nullopt; + else + return next_event_; +} + +// Run the next event. Returns the event type. +bool DebugDumpReplayer::RunNextEvent() { + if (!has_next_event_) + return false; + switch (next_event_.type()) { + case audioproc::Event::INIT: + OnInitEvent(next_event_.init()); + break; + case audioproc::Event::STREAM: + OnStreamEvent(next_event_.stream()); + break; + case audioproc::Event::REVERSE_STREAM: + OnReverseStreamEvent(next_event_.reverse_stream()); + break; + case audioproc::Event::CONFIG: + OnConfigEvent(next_event_.config()); + break; + case audioproc::Event::RUNTIME_SETTING: + OnRuntimeSettingEvent(next_event_.runtime_setting()); + break; + case audioproc::Event::UNKNOWN_EVENT: + // We do not expect to receive UNKNOWN event. + RTC_CHECK_NOTREACHED(); + } + LoadNextMessage(); + return true; +} + +const ChannelBuffer* DebugDumpReplayer::GetOutput() const { + return output_.get(); +} + +StreamConfig DebugDumpReplayer::GetOutputConfig() const { + return output_config_; +} + +// OnInitEvent reset the input/output/reserve channel format. +void DebugDumpReplayer::OnInitEvent(const audioproc::Init& msg) { + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_output_sample_rate()); + RTC_CHECK(msg.has_num_output_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + RTC_CHECK(msg.has_num_reverse_channels()); + + input_config_ = StreamConfig(msg.sample_rate(), msg.num_input_channels()); + output_config_ = + StreamConfig(msg.output_sample_rate(), msg.num_output_channels()); + reverse_config_ = + StreamConfig(msg.reverse_sample_rate(), msg.num_reverse_channels()); + + MaybeResetBuffer(&input_, input_config_); + MaybeResetBuffer(&output_, output_config_); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +// OnStreamEvent replays an input signal and verifies the output. +void DebugDumpReplayer::OnStreamEvent(const audioproc::Stream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + if (msg.has_applied_input_volume()) { + apm_->set_stream_analog_level(msg.applied_input_volume()); + } + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->set_stream_delay_ms(msg.delay())); + + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + RTC_CHECK_EQ(input_config_.num_channels(), + static_cast(msg.input_channel_size())); + RTC_CHECK_EQ(input_config_.num_frames() * sizeof(float), + msg.input_channel(0).size()); + + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(input_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); +} + +void DebugDumpReplayer::OnReverseStreamEvent( + const audioproc::ReverseStream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + RTC_CHECK_GT(msg.channel_size(), 0); + RTC_CHECK_EQ(reverse_config_.num_channels(), + static_cast(msg.channel_size())); + RTC_CHECK_EQ(reverse_config_.num_frames() * sizeof(float), + msg.channel(0).size()); + + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(reverse_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + + RTC_CHECK_EQ( + AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), reverse_config_, + reverse_config_, reverse_->channels())); +} + +void DebugDumpReplayer::OnConfigEvent(const audioproc::Config& msg) { + MaybeRecreateApm(msg); + ConfigureApm(msg); +} + +void DebugDumpReplayer::OnRuntimeSettingEvent( + const audioproc::RuntimeSetting& msg) { + RTC_CHECK(apm_.get()); + ReplayRuntimeSetting(apm_.get(), msg); +} + +void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) { + // These configurations cannot be changed on the fly. + RTC_CHECK(msg.has_aec_delay_agnostic_enabled()); + RTC_CHECK(msg.has_aec_extended_filter_enabled()); + + // We only create APM once, since changes on these fields should not + // happen in current implementation. + if (!apm_.get()) { + apm_ = AudioProcessingBuilderForTesting().Create(); + } +} + +void DebugDumpReplayer::ConfigureApm(const audioproc::Config& msg) { + AudioProcessing::Config apm_config; + + // AEC2/AECM configs. + RTC_CHECK(msg.has_aec_enabled()); + RTC_CHECK(msg.has_aecm_enabled()); + apm_config.echo_canceller.enabled = msg.aec_enabled() || msg.aecm_enabled(); + apm_config.echo_canceller.mobile_mode = msg.aecm_enabled(); + + // HPF configs. + RTC_CHECK(msg.has_hpf_enabled()); + apm_config.high_pass_filter.enabled = msg.hpf_enabled(); + + // Preamp configs. + RTC_CHECK(msg.has_pre_amplifier_enabled()); + apm_config.pre_amplifier.enabled = msg.pre_amplifier_enabled(); + apm_config.pre_amplifier.fixed_gain_factor = + msg.pre_amplifier_fixed_gain_factor(); + + // NS configs. + RTC_CHECK(msg.has_ns_enabled()); + RTC_CHECK(msg.has_ns_level()); + apm_config.noise_suppression.enabled = msg.ns_enabled(); + apm_config.noise_suppression.level = + static_cast( + msg.ns_level()); + + // TS configs. + RTC_CHECK(msg.has_transient_suppression_enabled()); + apm_config.transient_suppression.enabled = + msg.transient_suppression_enabled(); + + // AGC configs. + RTC_CHECK(msg.has_agc_enabled()); + RTC_CHECK(msg.has_agc_mode()); + RTC_CHECK(msg.has_agc_limiter_enabled()); + apm_config.gain_controller1.enabled = msg.agc_enabled(); + apm_config.gain_controller1.mode = + static_cast( + msg.agc_mode()); + apm_config.gain_controller1.enable_limiter = msg.agc_limiter_enabled(); + RTC_CHECK(msg.has_noise_robust_agc_enabled()); + apm_config.gain_controller1.analog_gain_controller.enabled = + msg.noise_robust_agc_enabled(); + + apm_->ApplyConfig(apm_config); +} + +void DebugDumpReplayer::LoadNextMessage() { + has_next_event_ = + debug_file_ && ReadMessageFromFile(debug_file_, &next_event_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h new file mode 100644 index 0000000000..be21c68663 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_replayer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +class DebugDumpReplayer { + public: + DebugDumpReplayer(); + ~DebugDumpReplayer(); + + // Set dump file + bool SetDumpFile(absl::string_view filename); + + // Return next event. + absl::optional GetNextEvent() const; + + // Run the next event. Returns true if succeeded. + bool RunNextEvent(); + + const ChannelBuffer* GetOutput() const; + StreamConfig GetOutputConfig() const; + + private: + // Following functions are facilities for replaying debug dumps. + void OnInitEvent(const audioproc::Init& msg); + void OnStreamEvent(const audioproc::Stream& msg); + void OnReverseStreamEvent(const audioproc::ReverseStream& msg); + void OnConfigEvent(const audioproc::Config& msg); + void OnRuntimeSettingEvent(const audioproc::RuntimeSetting& msg); + + void MaybeRecreateApm(const audioproc::Config& msg); + void ConfigureApm(const audioproc::Config& msg); + + void LoadNextMessage(); + + // Buffer for APM input/output. + std::unique_ptr> input_; + std::unique_ptr> reverse_; + std::unique_ptr> output_; + + rtc::scoped_refptr apm_; + + FILE* debug_file_; + + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + bool has_next_event_; + audioproc::Event next_event_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc new file mode 100644 index 0000000000..cded5de217 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/debug_dump_test.cc @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include // size_t + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "api/audio/echo_canceller3_factory.h" +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" +#include "modules/audio_processing/test/debug_dump_replayer.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/task_queue_for_test.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset( + new ChannelBuffer(config.num_frames(), config.num_channels())); + } +} + +class DebugDumpGenerator { + public: + DebugDumpGenerator(absl::string_view input_file_name, + int input_rate_hz, + int input_channels, + absl::string_view reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + absl::string_view dump_file_name, + bool enable_pre_amplifier); + + // Constructor that uses default input files. + explicit DebugDumpGenerator(const AudioProcessing::Config& apm_config); + + ~DebugDumpGenerator(); + + // Changes the sample rate of the input audio to the APM. + void SetInputRate(int rate_hz); + + // Sets if converts stereo input signal to mono by discarding other channels. + void ForceInputMono(bool mono); + + // Changes the sample rate of the reverse audio to the APM. + void SetReverseRate(int rate_hz); + + // Sets if converts stereo reverse signal to mono by discarding other + // channels. + void ForceReverseMono(bool mono); + + // Sets the required sample rate of the APM output. + void SetOutputRate(int rate_hz); + + // Sets the required channels of the APM output. + void SetOutputChannels(int channels); + + std::string dump_file_name() const { return dump_file_name_; } + + void StartRecording(); + void Process(size_t num_blocks); + void StopRecording(); + AudioProcessing* apm() const { return apm_.get(); } + + private: + static void ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer); + + // APM input/output settings. + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + // Input file format. + const std::string input_file_name_; + ResampleInputAudioFile input_audio_; + const int input_file_channels_; + + // Reverse file format. + const std::string reverse_file_name_; + ResampleInputAudioFile reverse_audio_; + const int reverse_file_channels_; + + // Buffer for APM input/output. + std::unique_ptr> input_; + std::unique_ptr> reverse_; + std::unique_ptr> output_; + + bool enable_pre_amplifier_; + + TaskQueueForTest worker_queue_; + rtc::scoped_refptr apm_; + + const std::string dump_file_name_; +}; + +DebugDumpGenerator::DebugDumpGenerator(absl::string_view input_file_name, + int input_rate_hz, + int input_channels, + absl::string_view reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + absl::string_view dump_file_name, + bool enable_pre_amplifier) + : input_config_(input_rate_hz, input_channels), + reverse_config_(reverse_rate_hz, reverse_channels), + output_config_(input_rate_hz, input_channels), + input_audio_(input_file_name, input_rate_hz, input_rate_hz), + input_file_channels_(input_channels), + reverse_audio_(reverse_file_name, reverse_rate_hz, reverse_rate_hz), + reverse_file_channels_(reverse_channels), + input_(new ChannelBuffer(input_config_.num_frames(), + input_config_.num_channels())), + reverse_(new ChannelBuffer(reverse_config_.num_frames(), + reverse_config_.num_channels())), + output_(new ChannelBuffer(output_config_.num_frames(), + output_config_.num_channels())), + enable_pre_amplifier_(enable_pre_amplifier), + worker_queue_("debug_dump_generator_worker_queue"), + dump_file_name_(dump_file_name) { + AudioProcessingBuilderForTesting apm_builder; + apm_ = apm_builder.Create(); +} + +DebugDumpGenerator::DebugDumpGenerator( + const AudioProcessing::Config& apm_config) + : DebugDumpGenerator(ResourcePath("near32_stereo", "pcm"), + 32000, + 2, + ResourcePath("far32_stereo", "pcm"), + 32000, + 2, + TempFilename(OutputPath(), "debug_aec"), + apm_config.pre_amplifier.enabled) { + apm_->ApplyConfig(apm_config); +} + +DebugDumpGenerator::~DebugDumpGenerator() { + remove(dump_file_name_.c_str()); +} + +void DebugDumpGenerator::SetInputRate(int rate_hz) { + input_audio_.set_output_rate_hz(rate_hz); + input_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::ForceInputMono(bool mono) { + const int channels = mono ? 1 : input_file_channels_; + input_config_.set_num_channels(channels); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::SetReverseRate(int rate_hz) { + reverse_audio_.set_output_rate_hz(rate_hz); + reverse_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::ForceReverseMono(bool mono) { + const int channels = mono ? 1 : reverse_file_channels_; + reverse_config_.set_num_channels(channels); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::SetOutputRate(int rate_hz) { + output_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::SetOutputChannels(int channels) { + output_config_.set_num_channels(channels); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::StartRecording() { + apm_->AttachAecDump( + AecDumpFactory::Create(dump_file_name_.c_str(), -1, &worker_queue_)); +} + +void DebugDumpGenerator::Process(size_t num_blocks) { + for (size_t i = 0; i < num_blocks; ++i) { + ReadAndDeinterleave(&reverse_audio_, reverse_file_channels_, + reverse_config_, reverse_->channels()); + ReadAndDeinterleave(&input_audio_, input_file_channels_, input_config_, + input_->channels()); + RTC_CHECK_EQ(AudioProcessing::kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_analog_level(100); + if (enable_pre_amplifier_) { + apm_->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain(1 + i % 10)); + } + apm_->set_stream_key_pressed(i % 10 == 9); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); + + RTC_CHECK_EQ( + AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), reverse_config_, + reverse_config_, reverse_->channels())); + } +} + +void DebugDumpGenerator::StopRecording() { + apm_->DetachAecDump(); +} + +void DebugDumpGenerator::ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer) { + const size_t num_frames = config.num_frames(); + const int out_channels = config.num_channels(); + + std::vector signal(channels * num_frames); + + audio->Read(num_frames * channels, &signal[0]); + + // We only allow reducing number of channels by discarding some channels. + RTC_CHECK_LE(out_channels, channels); + for (int channel = 0; channel < out_channels; ++channel) { + for (size_t i = 0; i < num_frames; ++i) { + buffer[channel][i] = S16ToFloat(signal[i * channels + channel]); + } + } +} + +} // namespace + +class DebugDumpTest : public ::testing::Test { + public: + // VerifyDebugDump replays a debug dump using APM and verifies that the result + // is bit-exact-identical to the output channel in the dump. This is only + // guaranteed if the debug dump is started on the first frame. + void VerifyDebugDump(absl::string_view in_filename); + + private: + DebugDumpReplayer debug_dump_replayer_; +}; + +void DebugDumpTest::VerifyDebugDump(absl::string_view in_filename) { + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(in_filename)); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::STREAM) { + const audioproc::Stream* msg = &event->stream(); + const StreamConfig output_config = debug_dump_replayer_.GetOutputConfig(); + const ChannelBuffer* output = debug_dump_replayer_.GetOutput(); + // Check that output of APM is bit-exact to the output in the dump. + ASSERT_EQ(output_config.num_channels(), + static_cast(msg->output_channel_size())); + ASSERT_EQ(output_config.num_frames() * sizeof(float), + msg->output_channel(0).size()); + for (int i = 0; i < msg->output_channel_size(); ++i) { + ASSERT_EQ(0, + memcmp(output->channels()[i], msg->output_channel(i).data(), + msg->output_channel(i).size())); + } + } + } +} + +TEST_F(DebugDumpTest, SimpleCase) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeInputFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + + generator.StartRecording(); + generator.Process(100); + generator.SetInputRate(48000); + + generator.ForceInputMono(true); + // Number of output channel should not be larger than that of input. APM will + // fail otherwise. + generator.SetOutputChannels(1); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeReverseFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.SetReverseRate(48000); + generator.ForceReverseMono(true); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeOutputFormat) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.SetOutputRate(48000); + generator.SetOutputChannels(1); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleAec) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + + apm_config.echo_canceller.enabled = false; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringInclusive) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + apm_config.gain_controller1.analog_gain_controller.enabled = true; + apm_config.gain_controller1.analog_gain_controller.startup_min_volume = 0; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringExclusive) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsNotSubstring, + "AgcClippingLevelExperiment", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) { + AudioProcessing::Config apm_config; + apm_config.echo_canceller.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(::testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyEmptyExperimentalString) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const absl::optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_EQ(0u, msg->experiments_description().size()); + } + } +} + +// AGC is not supported on Android or iOS. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_ToggleAgc DISABLED_ToggleAgc +#else +#define MAYBE_ToggleAgc ToggleAgc +#endif +TEST_F(DebugDumpTest, MAYBE_ToggleAgc) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.gain_controller1.enabled = !apm_config.gain_controller1.enabled; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleNs) { + DebugDumpGenerator generator(/*apm_config=*/{}); + generator.StartRecording(); + generator.Process(100); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.noise_suppression.enabled = !apm_config.noise_suppression.enabled; + generator.apm()->ApplyConfig(apm_config); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, TransientSuppressionOn) { + DebugDumpGenerator generator(/*apm_config=*/{}); + + AudioProcessing::Config apm_config = generator.apm()->GetConfig(); + apm_config.transient_suppression.enabled = true; + generator.apm()->ApplyConfig(apm_config); + + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, PreAmplifierIsOn) { + AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + DebugDumpGenerator generator(apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc new file mode 100644 index 0000000000..1d36b954f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v) { + RandomizeSampleVector(random_generator, v, + /*amplitude=*/32767.f); +} + +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude) { + for (auto& v_k : v) { + v_k = 2 * amplitude * random_generator->Rand() - amplitude; + } +} + +template +void DelayBuffer::Delay(rtc::ArrayView x, + rtc::ArrayView x_delayed) { + RTC_DCHECK_EQ(x.size(), x_delayed.size()); + if (buffer_.empty()) { + std::copy(x.begin(), x.end(), x_delayed.begin()); + } else { + for (size_t k = 0; k < x.size(); ++k) { + x_delayed[k] = buffer_[next_insert_index_]; + buffer_[next_insert_index_] = x[k]; + next_insert_index_ = (next_insert_index_ + 1) % buffer_.size(); + } + } +} + +template class DelayBuffer; +template class DelayBuffer; +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h new file mode 100644 index 0000000000..0d70cd39c6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ + +#include +#include + +#include "api/array_view.h" +#include "rtc_base/random.h" + +namespace webrtc { + +// Randomizes the elements in a vector with values -32767.f:32767.f. +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v); + +// Randomizes the elements in a vector with values -amplitude:amplitude. +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude); + +// Class for delaying a signal a fixed number of samples. +template +class DelayBuffer { + public: + explicit DelayBuffer(size_t delay) : buffer_(delay) {} + ~DelayBuffer() = default; + + // Produces a delayed signal copy of x. + void Delay(rtc::ArrayView x, rtc::ArrayView x_delayed); + + private: + std::vector buffer_; + size_t next_insert_index_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc new file mode 100644 index 0000000000..164d28fa16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoCancellerTestTools, FloatDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer delay_buffer(kDelay); + std::vector v(1000, 0.f); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector v_delayed = v; + constexpr size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, IntDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer delay_buffer(kDelay); + std::vector v(1000, 0); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector v_delayed = v; + const size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, RandomizeSampleVector) { + Random random_generator(42U); + std::vector v(50, 0.f); + std::vector v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); + v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); +} + +TEST(EchoCancellerTestTools, RandomizeSampleVectorWithAmplitude) { + Random random_generator(42U); + std::vector v(50, 0.f); + RandomizeSampleVector(&random_generator, v, 1000.f); + EXPECT_GE(1000.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-1000.f, *std::min_element(v.begin(), v.end())); + RandomizeSampleVector(&random_generator, v, 100.f); + EXPECT_GE(100.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-100.f, *std::min_element(v.begin(), v.end())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h b/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h new file mode 100644 index 0000000000..763d6e4f0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/echo_control_mock.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ +#define MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ + +#include "api/audio/echo_control.h" +#include "test/gmock.h" + +namespace webrtc { + +class AudioBuffer; + +class MockEchoControl : public EchoControl { + public: + MOCK_METHOD(void, AnalyzeRender, (AudioBuffer * render), (override)); + MOCK_METHOD(void, AnalyzeCapture, (AudioBuffer * capture), (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, bool echo_path_change), + (override)); + MOCK_METHOD(void, + ProcessCapture, + (AudioBuffer * capture, + AudioBuffer* linear_output, + bool echo_path_change), + (override)); + MOCK_METHOD(EchoControl::Metrics, GetMetrics, (), (const, override)); + MOCK_METHOD(void, SetAudioBufferDelay, (int delay_ms), (override)); + MOCK_METHOD(void, + SetCaptureOutputUsage, + (bool capture_output_used), + (override)); + MOCK_METHOD(bool, ActiveProcessing, (), (const, override)); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_ECHO_CONTROL_MOCK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc new file mode 100644 index 0000000000..3fd80b2ede --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/fake_recording_device.h" + +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace test { + +namespace { + +constexpr float kFloatSampleMin = -32768.f; +constexpr float kFloatSampleMax = 32767.0f; + +} // namespace + +// Abstract class for the different fake recording devices. +class FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceWorker(const int initial_mic_level) + : mic_level_(initial_mic_level) {} + int mic_level() const { return mic_level_; } + void set_mic_level(const int level) { mic_level_ = level; } + void set_undo_mic_level(const int level) { undo_mic_level_ = level; } + virtual ~FakeRecordingDeviceWorker() = default; + virtual void ModifyBufferInt16(rtc::ArrayView buffer) = 0; + virtual void ModifyBufferFloat(ChannelBuffer* buffer) = 0; + + protected: + // Mic level to simulate. + int mic_level_; + // Optional mic level to undo. + absl::optional undo_mic_level_; +}; + +namespace { + +// Identity fake recording device. The samples are not modified, which is +// equivalent to a constant gain curve at 1.0 - only used for testing. +class FakeRecordingDeviceIdentity final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceIdentity(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceIdentity() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override {} + void ModifyBufferFloat(ChannelBuffer* buffer) override {} +}; + +// Linear fake recording device. The gain curve is a linear function mapping the +// mic levels range [0, 255] to [0.0, 1.0]. +class FakeRecordingDeviceLinear final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceLinear(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceLinear() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override { + const size_t number_of_samples = buffer.size(); + int16_t* data = buffer.data(); + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t i = 0; i < number_of_samples; ++i) { + data[i] = rtc::saturated_cast(data[i] * mic_level_ / divisor); + } + } + void ModifyBufferFloat(ChannelBuffer* buffer) override { + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t c = 0; c < buffer->num_channels(); ++c) { + for (size_t i = 0; i < buffer->num_frames(); ++i) { + buffer->channels()[c][i] = + rtc::SafeClamp(buffer->channels()[c][i] * mic_level_ / divisor, + kFloatSampleMin, kFloatSampleMax); + } + } + } +}; + +float ComputeAgcLinearFactor(const absl::optional& undo_mic_level, + int mic_level) { + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const int undo_level = + (undo_mic_level && *undo_mic_level > 0) ? *undo_mic_level : 100; + return DbToRatio(kGainMap[mic_level] - kGainMap[undo_level]); +} + +// Roughly dB-scale fake recording device. Valid levels are [0, 255]. The mic +// applies a gain from kGainMap in agc/gain_map_internal.h. +class FakeRecordingDeviceAgc final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceAgc(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceAgc() override = default; + void ModifyBufferInt16(rtc::ArrayView buffer) override { + const float scaling_factor = + ComputeAgcLinearFactor(undo_mic_level_, mic_level_); + const size_t number_of_samples = buffer.size(); + int16_t* data = buffer.data(); + for (size_t i = 0; i < number_of_samples; ++i) { + data[i] = rtc::saturated_cast(data[i] * scaling_factor); + } + } + void ModifyBufferFloat(ChannelBuffer* buffer) override { + const float scaling_factor = + ComputeAgcLinearFactor(undo_mic_level_, mic_level_); + for (size_t c = 0; c < buffer->num_channels(); ++c) { + for (size_t i = 0; i < buffer->num_frames(); ++i) { + buffer->channels()[c][i] = + rtc::SafeClamp(buffer->channels()[c][i] * scaling_factor, + kFloatSampleMin, kFloatSampleMax); + } + } + } +}; + +} // namespace + +FakeRecordingDevice::FakeRecordingDevice(int initial_mic_level, + int device_kind) { + switch (device_kind) { + case 0: + worker_ = + std::make_unique(initial_mic_level); + break; + case 1: + worker_ = std::make_unique(initial_mic_level); + break; + case 2: + worker_ = std::make_unique(initial_mic_level); + break; + default: + RTC_DCHECK_NOTREACHED(); + break; + } +} + +FakeRecordingDevice::~FakeRecordingDevice() = default; + +int FakeRecordingDevice::MicLevel() const { + RTC_CHECK(worker_); + return worker_->mic_level(); +} + +void FakeRecordingDevice::SetMicLevel(const int level) { + RTC_CHECK(worker_); + if (level != worker_->mic_level()) + RTC_LOG(LS_INFO) << "Simulate mic level update: " << level; + worker_->set_mic_level(level); +} + +void FakeRecordingDevice::SetUndoMicLevel(const int level) { + RTC_DCHECK(worker_); + // TODO(alessiob): The behavior with undo level equal to zero is not clear yet + // and will be defined in future CLs once more FakeRecordingDeviceWorker + // implementations need to be added. + RTC_CHECK(level > 0) << "Zero undo mic level is unsupported"; + worker_->set_undo_mic_level(level); +} + +void FakeRecordingDevice::SimulateAnalogGain(rtc::ArrayView buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferInt16(buffer); +} + +void FakeRecordingDevice::SimulateAnalogGain(ChannelBuffer* buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferFloat(buffer); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h new file mode 100644 index 0000000000..da3c0cf794 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ + +#include +#include +#include + +#include "api/array_view.h" +#include "common_audio/channel_buffer.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +class FakeRecordingDeviceWorker; + +// Class for simulating a microphone with analog gain. +// +// The intended modes of operation are the following: +// +// FakeRecordingDevice fake_mic(255, 1); +// +// fake_mic.SetMicLevel(170); +// fake_mic.SimulateAnalogGain(buffer); +// +// When the mic level to undo is known: +// +// fake_mic.SetMicLevel(170); +// fake_mic.SetUndoMicLevel(30); +// fake_mic.SimulateAnalogGain(buffer); +// +// The second option virtually restores the unmodified microphone level. Calling +// SimulateAnalogGain() will first "undo" the gain applied by the real +// microphone (e.g., 30). +class FakeRecordingDevice final { + public: + FakeRecordingDevice(int initial_mic_level, int device_kind); + ~FakeRecordingDevice(); + + int MicLevel() const; + void SetMicLevel(int level); + void SetUndoMicLevel(int level); + + // Simulates the analog gain. + // If `real_device_level` is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set `real_device_level` to + // an empty value. + void SimulateAnalogGain(rtc::ArrayView buffer); + + // Simulates the analog gain. + // If `real_device_level` is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set `real_device_level` to + // an empty value. + void SimulateAnalogGain(ChannelBuffer* buffer); + + private: + // Fake recording device worker. + std::unique_ptr worker_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc new file mode 100644 index 0000000000..2ac8b1dc48 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/fake_recording_device_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/fake_recording_device.h" + +#include +#include +#include +#include + +#include "api/array_view.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kInitialMicLevel = 100; + +// TODO(alessiob): Add new fake recording device kind values here as they are +// added in FakeRecordingDevice::FakeRecordingDevice. +const std::vector kFakeRecDeviceKinds = {0, 1, 2}; + +const std::vector> kTestMultiChannelSamples{ + std::vector{-10.f, -1.f, -0.1f, 0.f, 0.1f, 1.f, 10.f}}; + +// Writes samples into ChannelBuffer. +void WritesDataIntoChannelBuffer(const std::vector>& data, + ChannelBuffer* buff) { + EXPECT_EQ(data.size(), buff->num_channels()); + EXPECT_EQ(data[0].size(), buff->num_frames()); + for (size_t c = 0; c < buff->num_channels(); ++c) { + for (size_t f = 0; f < buff->num_frames(); ++f) { + buff->channels()[c][f] = data[c][f]; + } + } +} + +std::unique_ptr> CreateChannelBufferWithData( + const std::vector>& data) { + auto buff = + std::make_unique>(data[0].size(), data.size()); + WritesDataIntoChannelBuffer(data, buff.get()); + return buff; +} + +// Checks that the samples modified using monotonic level values are also +// monotonic. +void CheckIfMonotoneSamplesModules(const ChannelBuffer* prev, + const ChannelBuffer* curr) { + RTC_DCHECK_EQ(prev->num_channels(), curr->num_channels()); + RTC_DCHECK_EQ(prev->num_frames(), curr->num_frames()); + bool valid = true; + for (size_t i = 0; i < prev->num_channels(); ++i) { + for (size_t j = 0; j < prev->num_frames(); ++j) { + valid = std::fabs(prev->channels()[i][j]) <= + std::fabs(curr->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +// Checks that the samples in each pair have the same sign unless the sample in +// `dst` is zero (because of zero gain). +void CheckSameSign(const ChannelBuffer* src, + const ChannelBuffer* dst) { + RTC_DCHECK_EQ(src->num_channels(), dst->num_channels()); + RTC_DCHECK_EQ(src->num_frames(), dst->num_frames()); + const auto fsgn = [](float x) { return ((x < 0) ? -1 : (x > 0) ? 1 : 0); }; + bool valid = true; + for (size_t i = 0; i < src->num_channels(); ++i) { + for (size_t j = 0; j < src->num_frames(); ++j) { + valid = dst->channels()[i][j] == 0.0f || + fsgn(src->channels()[i][j]) == fsgn(dst->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +std::string FakeRecordingDeviceKindToString(int fake_rec_device_kind) { + rtc::StringBuilder ss; + ss << "fake recording device: " << fake_rec_device_kind; + return ss.Release(); +} + +std::string AnalogLevelToString(int level) { + rtc::StringBuilder ss; + ss << "analog level: " << level; + return ss.Release(); +} + +} // namespace + +TEST(FakeRecordingDevice, CheckHelperFunctions) { + constexpr size_t kC = 0; // Channel index. + constexpr size_t kS = 1; // Sample index. + + // Check read. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_EQ(kTestMultiChannelSamples[c][s], buff->channels()[c][s]); + } + } + + // Check write. + buff->channels()[kC][kS] = -5.0f; + RTC_DCHECK_NE(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); + + // Check reset. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + EXPECT_EQ(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); +} + +// Implicitly checks that changes to the mic and undo levels are visible to the +// FakeRecordingDeviceWorker implementation are injected in FakeRecordingDevice. +TEST(FakeRecordingDevice, TestWorkerAbstractClass) { + FakeRecordingDevice fake_recording_device(kInitialMicLevel, 1); + + auto buff1 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff1.get()); + + auto buff2 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SimulateAnalogGain(buff2.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff2->channels()[c][s])); + } + } + + auto buff3 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SetUndoMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff3.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + EXPECT_LE(std::abs(buff2->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldBeMonotone) { + // Create input-output buffers. + auto buff_prev = CreateChannelBufferWithData(kTestMultiChannelSamples); + auto buff_curr = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + + // Apply lowest analog level. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_prev.get()); + fake_recording_device.SetMicLevel(0); + fake_recording_device.SimulateAnalogGain(buff_prev.get()); + + // Increment analog level to check monotonicity. + for (int i = 1; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_curr.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff_curr.get()); + CheckIfMonotoneSamplesModules(buff_prev.get(), buff_curr.get()); + + // Update prev. + buff_prev.swap(buff_curr); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldNotChangeSign) { + // Create view on original samples. + std::unique_ptr> buff_orig = + CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Create output buffer. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + for (int i = 0; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff.get()); + CheckSameSign(buff_orig.get(), buff.get()); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc new file mode 100644 index 0000000000..1a82258903 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/performance_timer.h" + +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +PerformanceTimer::PerformanceTimer(int num_frames_to_process) + : clock_(webrtc::Clock::GetRealTimeClock()) { + timestamps_us_.reserve(num_frames_to_process); +} + +PerformanceTimer::~PerformanceTimer() = default; + +void PerformanceTimer::StartTimer() { + start_timestamp_us_ = clock_->TimeInMicroseconds(); +} + +void PerformanceTimer::StopTimer() { + RTC_DCHECK(start_timestamp_us_); + timestamps_us_.push_back(clock_->TimeInMicroseconds() - *start_timestamp_us_); +} + +double PerformanceTimer::GetDurationAverage() const { + return GetDurationAverage(0); +} + +double PerformanceTimer::GetDurationStandardDeviation() const { + return GetDurationStandardDeviation(0); +} + +double PerformanceTimer::GetDurationAverage( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + return static_cast( + std::accumulate(timestamps_us_.begin() + number_of_warmup_samples, + timestamps_us_.end(), static_cast(0))) / + number_of_samples; +} + +double PerformanceTimer::GetDurationStandardDeviation( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + RTC_DCHECK_GT(number_of_samples, 0); + double average_duration = GetDurationAverage(number_of_warmup_samples); + + double variance = std::accumulate( + timestamps_us_.begin() + number_of_warmup_samples, timestamps_us_.end(), + 0.0, [average_duration](const double& a, const int64_t& b) { + return a + (b - average_duration) * (b - average_duration); + }); + + return sqrt(variance / number_of_samples); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h new file mode 100644 index 0000000000..5375ba74e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/performance_timer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ + +#include + +#include "absl/types/optional.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { + +class PerformanceTimer { + public: + explicit PerformanceTimer(int num_frames_to_process); + ~PerformanceTimer(); + + void StartTimer(); + void StopTimer(); + + double GetDurationAverage() const; + double GetDurationStandardDeviation() const; + + // These methods are the same as those above, but they ignore the first + // `number_of_warmup_samples` measurements. + double GetDurationAverage(size_t number_of_warmup_samples) const; + double GetDurationStandardDeviation(size_t number_of_warmup_samples) const; + + private: + webrtc::Clock* clock_; + absl::optional start_timestamp_us_; + std::vector timestamps_us_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc new file mode 100644 index 0000000000..75574961b0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/protobuf_utils.h" + +#include + +#include "rtc_base/system/arch.h" + +namespace { +// Allocates new memory in the memory owned by the unique_ptr to fit the raw +// message and returns the number of bytes read when having a string stream as +// input. +size_t ReadMessageBytesFromString(std::stringstream* input, + std::unique_ptr* bytes) { + int32_t size = 0; + input->read(reinterpret_cast(&size), sizeof(int32_t)); + int32_t size_read = input->gcount(); + if (size_read != sizeof(int32_t)) + return 0; + if (size <= 0) + return 0; + + *bytes = std::make_unique(size); + input->read(reinterpret_cast(bytes->get()), + size * sizeof((*bytes)[0])); + size_read = input->gcount(); + return size_read == size ? size : 0; +} +} // namespace + +namespace webrtc { + +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr* bytes) { +// The "wire format" for the size is little-endian. Assume we're running on +// a little-endian machine. +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert messsage from little-endian." +#endif + int32_t size = 0; + if (fread(&size, sizeof(size), 1, file) != 1) + return 0; + if (size <= 0) + return 0; + + *bytes = std::make_unique(size); + return fread(bytes->get(), sizeof((*bytes)[0]), size, file); +} + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg) { + std::unique_ptr bytes; + size_t size = ReadMessageBytesFromFile(file, &bytes); + if (!size) + return false; + + msg->Clear(); + return msg->ParseFromArray(bytes.get(), size); +} + +// Returns true on success, false on error or end of string stream. +bool ReadMessageFromString(std::stringstream* input, MessageLite* msg) { + std::unique_ptr bytes; + size_t size = ReadMessageBytesFromString(input, &bytes); + if (!size) + return false; + + msg->Clear(); + return msg->ParseFromArray(bytes.get(), size); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h new file mode 100644 index 0000000000..b9c2e819f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/protobuf_utils.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ + +#include +#include // no-presubmit-check TODO(webrtc:8982) + +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/protobuf_utils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +// Allocates new memory in the unique_ptr to fit the raw message and returns the +// number of bytes read. +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr* bytes); + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg); + +// Returns true on success, false on error or end of string stream. +bool ReadMessageFromString( + std::stringstream* input, // no-presubmit-check TODO(webrtc:8982) + MessageLite* msg); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn new file mode 100644 index 0000000000..e53a829623 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn @@ -0,0 +1,170 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +if (!build_with_chromium) { + group("py_quality_assessment") { + testonly = true + deps = [ + ":scripts", + ":unit_tests", + ] + } + + copy("scripts") { + testonly = true + sources = [ + "README.md", + "apm_quality_assessment.py", + "apm_quality_assessment.sh", + "apm_quality_assessment_boxplot.py", + "apm_quality_assessment_export.py", + "apm_quality_assessment_gencfgs.py", + "apm_quality_assessment_optimize.py", + ] + outputs = [ "$root_build_dir/py_quality_assessment/{{source_file_part}}" ] + deps = [ + ":apm_configs", + ":lib", + ":output", + "../../../../resources/audio_processing/test/py_quality_assessment:probing_signals", + "../../../../rtc_tools:audioproc_f", + ] + } + + copy("apm_configs") { + testonly = true + sources = [ "apm_configs/default.json" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/apm_configs/{{source_file_part}}", + ] + } # apm_configs + + copy("lib") { + testonly = true + sources = [ + "quality_assessment/__init__.py", + "quality_assessment/annotations.py", + "quality_assessment/audioproc_wrapper.py", + "quality_assessment/collect_data.py", + "quality_assessment/data_access.py", + "quality_assessment/echo_path_simulation.py", + "quality_assessment/echo_path_simulation_factory.py", + "quality_assessment/eval_scores.py", + "quality_assessment/eval_scores_factory.py", + "quality_assessment/evaluation.py", + "quality_assessment/exceptions.py", + "quality_assessment/export.py", + "quality_assessment/export_unittest.py", + "quality_assessment/external_vad.py", + "quality_assessment/input_mixer.py", + "quality_assessment/input_signal_creator.py", + "quality_assessment/results.css", + "quality_assessment/results.js", + "quality_assessment/signal_processing.py", + "quality_assessment/simulation.py", + "quality_assessment/test_data_generation.py", + "quality_assessment/test_data_generation_factory.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}" ] + deps = [ "../../../../resources/audio_processing/test/py_quality_assessment:noise_tracks" ] + } + + copy("output") { + testonly = true + sources = [ "output/README.md" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = + [ "$root_build_dir/py_quality_assessment/output/{{source_file_part}}" ] + } + + group("unit_tests") { + testonly = true + visibility = [ ":*" ] # Only targets in this file can depend on this. + deps = [ + ":apm_vad", + ":fake_polqa", + ":lib_unit_tests", + ":scripts_unit_tests", + ":vad", + ] + } + + rtc_executable("fake_polqa") { + testonly = true + sources = [ "quality_assessment/fake_polqa.cc" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + output_dir = "${root_out_dir}/py_quality_assessment/quality_assessment" + deps = [ + "../../../../rtc_base:checks", + "//third_party/abseil-cpp/absl/strings", + ] + } + + rtc_executable("vad") { + testonly = true + sources = [ "quality_assessment/vad.cc" ] + deps = [ + "../../../../common_audio", + "../../../../rtc_base:logging", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("apm_vad") { + testonly = true + sources = [ "quality_assessment/apm_vad.cc" ] + deps = [ + "../..", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../vad", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + rtc_executable("sound_level") { + testonly = true + sources = [ "quality_assessment/sound_level.cc" ] + deps = [ + "../..", + "../../../../common_audio", + "../../../../rtc_base:logging", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + + copy("lib_unit_tests") { + testonly = true + sources = [ + "quality_assessment/annotations_unittest.py", + "quality_assessment/echo_path_simulation_unittest.py", + "quality_assessment/eval_scores_unittest.py", + "quality_assessment/fake_external_vad.py", + "quality_assessment/input_mixer_unittest.py", + "quality_assessment/signal_processing_unittest.py", + "quality_assessment/simulation_unittest.py", + "quality_assessment/test_data_generation_unittest.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}" ] + } + + copy("scripts_unit_tests") { + testonly = true + sources = [ "apm_quality_assessment_unittest.py" ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ "$root_build_dir/py_quality_assessment/{{source_file_part}}" ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS new file mode 100644 index 0000000000..9f56bb830d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/OWNERS @@ -0,0 +1,5 @@ +aleloi@webrtc.org +alessiob@webrtc.org +henrik.lundin@webrtc.org +ivoc@webrtc.org +peah@webrtc.org diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md new file mode 100644 index 0000000000..4156112df2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/README.md @@ -0,0 +1,125 @@ +# APM Quality Assessment tool + +Python wrapper of APM simulators (e.g., `audioproc_f`) with which quality +assessment can be automatized. The tool allows to simulate different noise +conditions, input signals, APM configurations and it computes different scores. +Once the scores are computed, the results can be easily exported to an HTML page +which allows to listen to the APM input and output signals and also the +reference one used for evaluation. + +## Dependencies + - OS: Linux + - Python 2.7 + - Python libraries: enum34, numpy, scipy, pydub (0.17.0+), pandas (0.20.1+), + pyquery (1.2+), jsmin (2.2+), csscompressor (0.9.4) + - It is recommended that a dedicated Python environment is used + - install `virtualenv` + - `$ sudo apt-get install python-virtualenv` + - setup a new Python environment (e.g., `my_env`) + - `$ cd ~ && virtualenv my_env` + - activate the new Python environment + - `$ source ~/my_env/bin/activate` + - add dependcies via `pip` + - `(my_env)$ pip install enum34 numpy pydub scipy pandas pyquery jsmin \` + `csscompressor` + - PolqaOem64 (see http://www.polqa.info/) + - Tested with POLQA Library v1.180 / P863 v2.400 + - Aachen Impulse Response (AIR) Database + - Download https://www2.iks.rwth-aachen.de/air/air_database_release_1_4.zip + - Input probing signals and noise tracks (you can make your own dataset - *1) + +## Build + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` and check that + `apm_quality_assessment.py` exists + +## Unit tests + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` + - Run `python -m unittest discover -p "*_unittest.py"` + +## First time setup + - Deploy PolqaOem64 and set the `POLQA_PATH` environment variable + - e.g., `$ export POLQA_PATH=/var/opt/PolqaOem64` + - Deploy the AIR Database and set the `AECHEN_IR_DATABASE_PATH` environment + variable + - e.g., `$ export AECHEN_IR_DATABASE_PATH=/var/opt/AIR_1_4` + - Deploy probing signal tracks into + - `out/Default/py_quality_assessment/probing_signals` (*1) + - Deploy noise tracks into + - `out/Default/py_quality_assessment/noise_tracks` (*1, *2) + +(*1) You can use custom files as long as they are mono tracks sampled at 48kHz +encoded in the 16 bit signed format (it is recommended that the tracks are +converted and exported with Audacity). + +## Usage (scores computation) + - Go to `out/Default/py_quality_assessment` + - Check the `apm_quality_assessment.sh` as an example script to parallelize the + experiments + - Adjust the script according to your preferences (e.g., output path) + - Run `apm_quality_assessment.sh` + - The script will end by opening the browser and showing ALL the computed + scores + +## Usage (export reports) +Showing all the results at once can be confusing. You therefore may want to +export separate reports. In this case, you can use the +`apm_quality_assessment_export.py` script as follows: + + - Set `--output_dir, -o` to the same value used in `apm_quality_assessment.sh` + - Use regular expressions to select/filter out scores by + - APM configurations: `--config_names, -c` + - capture signals: `--capture_names, -i` + - render signals: `--render_names, -r` + - echo simulator: `--echo_simulator_names, -e` + - test data generators: `--test_data_generators, -t` + - scores: `--eval_scores, -s` + - Assign a suffix to the report name using `-f ` + +For instance: + +``` +$ ./apm_quality_assessment_export.py \ + -o output/ \ + -c "(^default$)|(.*AE.*)" \ + -t \(white_noise\) \ + -s \(polqa\) \ + -f echo +``` + +## Usage (boxplot) +After generating stats, it can help to visualize how a score depends on a +certain APM simulator parameter. The `apm_quality_assessment_boxplot.py` script +helps with that, producing plots similar to [this +one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png). + +Suppose some scores come from running the APM simulator `audioproc_f` with +or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots +side by side can be generated with + +``` +$ ./apm_quality_assessment_boxplot.py \ + -o /path/to/output + -v + -n /path/to/dir/with/apm_configs + -z lc +``` + +## Troubleshooting +The input wav file must be: + - sampled at a sample rate that is a multiple of 100 (required by POLQA) + - in the 16 bit format (required by `audioproc_f`) + - encoded in the Microsoft WAV signed 16 bit PCM format (Audacity default + when exporting) + +Depending on the license, the POLQA tool may take “breaks” as a way to limit the +throughput. When this happens, the APM Quality Assessment tool is slowed down. +For more details about this limitation, check Section 10.9.1 in the POLQA manual +v.1.18. + +In case of issues with the POLQA score computation, check +`py_quality_assessment/eval_scores.py` and adapt +`PolqaScore._parse_output_file()`. +The code can be also fixed directly into the build directory (namely, +`out/Default/py_quality_assessment/eval_scores.py`). diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py new file mode 100755 index 0000000000..e067ecb692 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Perform APM module quality assessment on one or more input files using one or + more APM simulator configuration files and one or more test data generators. + +Usage: apm_quality_assessment.py -i audio1.wav [audio2.wav ...] + -c cfg1.json [cfg2.json ...] + -n white [echo ...] + -e audio_level [polqa ...] + -o /path/to/output +""" + +import argparse +import logging +import os +import sys + +import quality_assessment.audioproc_wrapper as audioproc_wrapper +import quality_assessment.echo_path_simulation as echo_path_simulation +import quality_assessment.eval_scores as eval_scores +import quality_assessment.evaluation as evaluation +import quality_assessment.eval_scores_factory as eval_scores_factory +import quality_assessment.external_vad as external_vad +import quality_assessment.test_data_generation as test_data_generation +import quality_assessment.test_data_generation_factory as \ + test_data_generation_factory +import quality_assessment.simulation as simulation + +_ECHO_PATH_SIMULATOR_NAMES = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) +_TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) +_TEST_DATA_GENERATORS_NAMES = _TEST_DATA_GENERATOR_CLASSES.keys() +_EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES +_EVAL_SCORE_WORKER_NAMES = _EVAL_SCORE_WORKER_CLASSES.keys() + +_DEFAULT_CONFIG_FILE = 'apm_configs/default.json' + +_POLQA_BIN_NAME = 'PolqaOem64' + + +def _InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser(description=( + 'Perform APM module quality assessment on one or more input files using ' + 'one or more APM simulator configuration files and one or more ' + 'test data generators.')) + + parser.add_argument('-c', + '--config_files', + nargs='+', + required=False, + help=('path to the configuration files defining the ' + 'arguments with which the APM simulator tool is ' + 'called'), + default=[_DEFAULT_CONFIG_FILE]) + + parser.add_argument( + '-i', + '--capture_input_files', + nargs='+', + required=True, + help='path to the capture input wav files (one or more)') + + parser.add_argument('-r', + '--render_input_files', + nargs='+', + required=False, + help=('path to the render input wav files; either ' + 'omitted or one file for each file in ' + '--capture_input_files (files will be paired by ' + 'index)'), + default=None) + + parser.add_argument('-p', + '--echo_path_simulator', + required=False, + help=('custom echo path simulator name; required if ' + '--render_input_files is specified'), + choices=_ECHO_PATH_SIMULATOR_NAMES, + default=echo_path_simulation.NoEchoPathSimulator.NAME) + + parser.add_argument('-t', + '--test_data_generators', + nargs='+', + required=False, + help='custom list of test data generators to use', + choices=_TEST_DATA_GENERATORS_NAMES, + default=_TEST_DATA_GENERATORS_NAMES) + + parser.add_argument('--additive_noise_tracks_path', required=False, + help='path to the wav files for the additive', + default=test_data_generation. \ + AdditiveNoiseTestDataGenerator. \ + DEFAULT_NOISE_TRACKS_PATH) + + parser.add_argument('-e', + '--eval_scores', + nargs='+', + required=False, + help='custom list of evaluation scores to use', + choices=_EVAL_SCORE_WORKER_NAMES, + default=_EVAL_SCORE_WORKER_NAMES) + + parser.add_argument('-o', + '--output_dir', + required=False, + help=('base path to the output directory in which the ' + 'output wav files and the evaluation outcomes ' + 'are saved'), + default='output') + + parser.add_argument('--polqa_path', + required=True, + help='path to the POLQA tool') + + parser.add_argument('--air_db_path', + required=True, + help='path to the Aechen IR database') + + parser.add_argument('--apm_sim_path', required=False, + help='path to the APM simulator tool', + default=audioproc_wrapper. \ + AudioProcWrapper. \ + DEFAULT_APM_SIMULATOR_BIN_PATH) + + parser.add_argument('--echo_metric_tool_bin_path', + required=False, + help=('path to the echo metric binary ' + '(required for the echo eval score)'), + default=None) + + parser.add_argument( + '--copy_with_identity_generator', + required=False, + help=('If true, the identity test data generator makes a ' + 'copy of the clean speech input file.'), + default=False) + + parser.add_argument('--external_vad_paths', + nargs='+', + required=False, + help=('Paths to external VAD programs. Each must take' + '\'-i -o \' inputs'), + default=[]) + + parser.add_argument('--external_vad_names', + nargs='+', + required=False, + help=('Keys to the vad paths. Must be different and ' + 'as many as the paths.'), + default=[]) + + return parser + + +def _ValidateArguments(args, parser): + if args.capture_input_files and args.render_input_files and (len( + args.capture_input_files) != len(args.render_input_files)): + parser.error( + '--render_input_files and --capture_input_files must be lists ' + 'having the same length') + sys.exit(1) + + if args.render_input_files and not args.echo_path_simulator: + parser.error( + 'when --render_input_files is set, --echo_path_simulator is ' + 'also required') + sys.exit(1) + + if len(args.external_vad_names) != len(args.external_vad_paths): + parser.error('If provided, --external_vad_paths and ' + '--external_vad_names must ' + 'have the same number of arguments.') + sys.exit(1) + + +def main(): + # TODO(alessiob): level = logging.INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + _ValidateArguments(args, parser) + + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path=args.air_db_path, + noise_tracks_path=args.additive_noise_tracks_path, + copy_with_identity=args.copy_with_identity_generator)), + evaluation_score_factory=eval_scores_factory. + EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(args.polqa_path, _POLQA_BIN_NAME), + echo_metric_tool_bin_path=args.echo_metric_tool_bin_path), + ap_wrapper=audioproc_wrapper.AudioProcWrapper(args.apm_sim_path), + evaluator=evaluation.ApmModuleEvaluator(), + external_vads=external_vad.ExternalVad.ConstructVadDict( + args.external_vad_paths, args.external_vad_names)) + simulator.Run(config_filepaths=args.config_files, + capture_input_filepaths=args.capture_input_files, + render_input_filepaths=args.render_input_files, + echo_path_simulator_name=args.echo_path_simulator, + test_data_generator_names=args.test_data_generators, + eval_score_names=args.eval_scores, + output_dir=args.output_dir) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh new file mode 100755 index 0000000000..aa563ee26b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# Path to the POLQA tool. +if [ -z ${POLQA_PATH} ]; then # Check if defined. + # Default location. + export POLQA_PATH='/var/opt/PolqaOem64' +fi +if [ -d "${POLQA_PATH}" ]; then + echo "POLQA found in ${POLQA_PATH}" +else + echo "POLQA not found in ${POLQA_PATH}" + exit 1 +fi + +# Path to the Aechen IR database. +if [ -z ${AECHEN_IR_DATABASE_PATH} ]; then # Check if defined. + # Default location. + export AECHEN_IR_DATABASE_PATH='/var/opt/AIR_1_4' +fi +if [ -d "${AECHEN_IR_DATABASE_PATH}" ]; then + echo "AIR database found in ${AECHEN_IR_DATABASE_PATH}" +else + echo "AIR database not found in ${AECHEN_IR_DATABASE_PATH}" + exit 1 +fi + +# Customize probing signals, test data generators and scores if needed. +CAPTURE_SIGNALS=(probing_signals/*.wav) +TEST_DATA_GENERATORS=( \ + "identity" \ + "white_noise" \ + # "environmental_noise" \ + # "reverberation" \ +) +SCORES=( \ + # "polqa" \ + "audio_level_peak" \ + "audio_level_mean" \ +) +OUTPUT_PATH=output + +# Generate standard APM config files. +chmod +x apm_quality_assessment_gencfgs.py +./apm_quality_assessment_gencfgs.py + +# Customize APM configurations if needed. +APM_CONFIGS=(apm_configs/*.json) + +# Add output path if missing. +if [ ! -d ${OUTPUT_PATH} ]; then + mkdir ${OUTPUT_PATH} +fi + +# Start one process for each "probing signal"-"test data source" pair. +chmod +x apm_quality_assessment.py +for capture_signal_filepath in "${CAPTURE_SIGNALS[@]}" ; do + probing_signal_name="$(basename $capture_signal_filepath)" + probing_signal_name="${probing_signal_name%.*}" + for test_data_gen_name in "${TEST_DATA_GENERATORS[@]}" ; do + LOG_FILE="${OUTPUT_PATH}/apm_qa-${probing_signal_name}-"` + `"${test_data_gen_name}.log" + echo "Starting ${probing_signal_name} ${test_data_gen_name} "` + `"(see ${LOG_FILE})" + ./apm_quality_assessment.py \ + --polqa_path ${POLQA_PATH}\ + --air_db_path ${AECHEN_IR_DATABASE_PATH}\ + -i ${capture_signal_filepath} \ + -o ${OUTPUT_PATH} \ + -t ${test_data_gen_name} \ + -c "${APM_CONFIGS[@]}" \ + -e "${SCORES[@]}" > $LOG_FILE 2>&1 & + done +done + +# Join Python processes running apm_quality_assessment.py. +wait + +# Export results. +chmod +x ./apm_quality_assessment_export.py +./apm_quality_assessment_export.py -o ${OUTPUT_PATH} + +# Show results in the browser. +RESULTS_FILE="$(realpath ${OUTPUT_PATH}/results.html)" +sensible-browser "file://${RESULTS_FILE}" > /dev/null 2>&1 & diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py new file mode 100644 index 0000000000..c425885b95 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Shows boxplots of given score for different values of selected +parameters. Can be used to compare scores by audioproc_f flag. + +Usage: apm_quality_assessment_boxplot.py -o /path/to/output + -v polqa + -n /path/to/dir/with/apm_configs + -z audioproc_f_arg1 [arg2 ...] + +Arguments --config_names, --render_names, --echo_simulator_names, +--test_data_generators, --eval_scores can be used to filter the data +used for plotting. +""" + +import collections +import logging +import matplotlib.pyplot as plt +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Shows boxplot of given score for different values of selected' + 'parameters. Can be used to compare scores by audioproc_f flag') + + parser.add_argument('-v', + '--eval_score', + required=True, + help=('Score name for constructing boxplots')) + + parser.add_argument( + '-n', + '--config_dir', + required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-z', + '--params_to_plot', + required=True, + nargs='+', + help=('audioproc_f parameter values' + 'by which to group scores (no leading dash)')) + + return parser + + +def FilterScoresByParams(data_frame, filter_params, score_name, config_dir): + """Filters data on the values of one or more parameters. + + Args: + data_frame: pandas.DataFrame of all used input data. + + filter_params: each config of the input data is assumed to have + exactly one parameter from `filter_params` defined. Every value + of the parameters in `filter_params` is a key in the returned + dict; the associated value is all cells of the data with that + value of the parameter. + + score_name: Name of score which value is boxplotted. Currently cannot do + more than one value. + + config_dir: path to dir with APM configs. + + Returns: dictionary, key is a param value, result is all scores for + that param value (see `filter_params` for explanation). + """ + results = collections.defaultdict(dict) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + '.json')) + data_with_config = data_frame[data_frame.apm_config == config_name] + data_cell_scores = data_with_config[data_with_config.eval_score_name == + score_name] + + # Exactly one of `params_to_plot` must match: + (matching_param, ) = [ + x for x in filter_params if '-' + x in config_json + ] + + # Add scores for every track to the result. + for capture_name in data_cell_scores.capture: + result_score = float(data_cell_scores[data_cell_scores.capture == + capture_name].score) + config_dict = results[config_json['-' + matching_param]] + if capture_name not in config_dict: + config_dict[capture_name] = {} + + config_dict[capture_name][matching_param] = result_score + + return results + + +def _FlattenToScoresList(config_param_score_dict): + """Extracts a list of scores from input data structure. + + Args: + config_param_score_dict: of the form {'capture_name': + {'param_name' : score_value,.. } ..} + + Returns: Plain list of all score value present in input data + structure + """ + result = [] + for capture_name in config_param_score_dict: + result += list(config_param_score_dict[capture_name].values()) + return result + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Filter the data by `args.params_to_plot` + scores_filtered = FilterScoresByParams(scores_data_frame, + args.params_to_plot, + args.eval_score, args.config_dir) + + data_list = sorted(scores_filtered.items()) + data_values = [_FlattenToScoresList(x) for (_, x) in data_list] + data_labels = [x for (x, _) in data_list] + + _, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6)) + axes.boxplot(data_values, labels=data_labels) + axes.set_ylabel(args.eval_score) + axes.set_xlabel('/'.join(args.params_to_plot)) + plt.show() + + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py new file mode 100755 index 0000000000..c20accb9dc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Export the scores computed by the apm_quality_assessment.py script into an + HTML file. +""" + +import logging +import os +import sys + +import quality_assessment.collect_data as collect_data +import quality_assessment.export as export + + +def _BuildOutputFilename(filename_suffix): + """Builds the filename for the exported file. + + Args: + filename_suffix: suffix for the output file name. + + Returns: + A string. + """ + if filename_suffix is None: + return 'results.html' + return 'results-{}.html'.format(filename_suffix) + + +def main(): + # Init. + logging.basicConfig( + level=logging.DEBUG) # TODO(alessio): INFO once debugged. + parser = collect_data.InstanceArgumentsParser() + parser.add_argument('-f', + '--filename_suffix', + help=('suffix of the exported file')) + parser.description = ('Exports pre-computed APM module quality assessment ' + 'results into HTML tables') + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Export. + output_filepath = os.path.join(args.output_dir, + _BuildOutputFilename(args.filename_suffix)) + exporter = export.HtmlExport(output_filepath) + exporter.Export(scores_data_frame) + + logging.info('output file successfully written in %s', output_filepath) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py new file mode 100755 index 0000000000..ca80f85bd1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Generate .json files with which the APM module can be tested using the + apm_quality_assessment.py script and audioproc_f as APM simulator. +""" + +import logging +import os + +import quality_assessment.data_access as data_access + +OUTPUT_PATH = os.path.abspath('apm_configs') + + +def _GenerateDefaultOverridden(config_override): + """Generates one or more APM overriden configurations. + + For each item in config_override, it overrides the default configuration and + writes a new APM configuration file. + + The default settings are loaded via "-all_default". + Check "src/modules/audio_processing/test/audioproc_float.cc" and search + for "if (FLAG_all_default) {". + + For instance, in 55eb6d621489730084927868fed195d3645a9ec9 the default is this: + settings.use_aec = rtc::Optional(true); + settings.use_aecm = rtc::Optional(false); + settings.use_agc = rtc::Optional(true); + settings.use_bf = rtc::Optional(false); + settings.use_ed = rtc::Optional(false); + settings.use_hpf = rtc::Optional(true); + settings.use_le = rtc::Optional(true); + settings.use_ns = rtc::Optional(true); + settings.use_ts = rtc::Optional(true); + settings.use_vad = rtc::Optional(true); + + Args: + config_override: dict of APM configuration file names as keys; the values + are dict instances encoding the audioproc_f flags. + """ + for config_filename in config_override: + config = config_override[config_filename] + config['-all_default'] = None + + config_filepath = os.path.join( + OUTPUT_PATH, 'default-{}.json'.format(config_filename)) + logging.debug('config file <%s> | %s', config_filepath, config) + + data_access.AudioProcConfigFile.Save(config_filepath, config) + logging.info('config file created: <%s>', config_filepath) + + +def _GenerateAllDefaultButOne(): + """Disables the flags enabled by default one-by-one. + """ + config_sets = { + 'no_AEC': { + '-aec': 0, + }, + 'no_AGC': { + '-agc': 0, + }, + 'no_HP_filter': { + '-hpf': 0, + }, + 'no_level_estimator': { + '-le': 0, + }, + 'no_noise_suppressor': { + '-ns': 0, + }, + 'no_transient_suppressor': { + '-ts': 0, + }, + 'no_vad': { + '-vad': 0, + }, + } + _GenerateDefaultOverridden(config_sets) + + +def _GenerateAllDefaultPlusOne(): + """Enables the flags disabled by default one-by-one. + """ + config_sets = { + 'with_AECM': { + '-aec': 0, + '-aecm': 1, + }, # AEC and AECM are exclusive. + 'with_AGC_limiter': { + '-agc_limiter': 1, + }, + 'with_AEC_delay_agnostic': { + '-delay_agnostic': 1, + }, + 'with_drift_compensation': { + '-drift_compensation': 1, + }, + 'with_residual_echo_detector': { + '-ed': 1, + }, + 'with_AEC_extended_filter': { + '-extended_filter': 1, + }, + 'with_LC': { + '-lc': 1, + }, + 'with_refined_adaptive_filter': { + '-refined_adaptive_filter': 1, + }, + } + _GenerateDefaultOverridden(config_sets) + + +def main(): + logging.basicConfig(level=logging.INFO) + _GenerateAllDefaultPlusOne() + _GenerateAllDefaultButOne() + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py new file mode 100644 index 0000000000..ecae2ed995 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Finds the APM configuration that maximizes a provided metric by +parsing the output generated apm_quality_assessment.py. +""" + +from __future__ import division + +import collections +import logging +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + + +def _InstanceArgumentsParser(): + """Arguments parser factory. Extends the arguments from 'collect_data' + with a few extra for selecting what parameters to optimize for. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Rudimentary optimization of a function over different parameter' + 'combinations.') + + parser.add_argument( + '-n', + '--config_dir', + required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-p', + '--params', + required=True, + nargs='+', + help=('parameters to parse from the config files in' + 'config_dir')) + + parser.add_argument( + '-z', + '--params_not_to_optimize', + required=False, + nargs='+', + default=[], + help=('parameters from `params` not to be optimized for')) + + return parser + + +def _ConfigurationAndScores(data_frame, params, params_not_to_optimize, + config_dir): + """Returns a list of all configurations and scores. + + Args: + data_frame: A pandas data frame with the scores and config name + returned by _FindScores. + params: The parameter names to parse from configs the config + directory + + params_not_to_optimize: The parameter names which shouldn't affect + the optimal parameter + selection. E.g., fixed settings and not + tunable parameters. + + config_dir: Path to folder with config files. + + Returns: + Dictionary of the form + {param_combination: [{params: {param1: value1, ...}, + scores: {score1: value1, ...}}]}. + + The key `param_combination` runs over all parameter combinations + of the parameters in `params` and not in + `params_not_to_optimize`. A corresponding value is a list of all + param combinations for params in `params_not_to_optimize` and + their scores. + """ + results = collections.defaultdict(list) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + score_names = data_frame['eval_score_name'].drop_duplicates( + ).values.tolist() + + # Normalize the scores + normalization_constants = {} + for score_name in score_names: + scores = data_frame[data_frame.eval_score_name == score_name].score + normalization_constants[score_name] = max(scores) + + params_to_optimize = [p for p in params if p not in params_not_to_optimize] + param_combination = collections.namedtuple("ParamCombination", + params_to_optimize) + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + ".json")) + scores = {} + data_cell = data_frame[data_frame.apm_config == config_name] + for score_name in score_names: + data_cell_scores = data_cell[data_cell.eval_score_name == + score_name].score + scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) + scores[score_name] /= normalization_constants[score_name] + + result = {'scores': scores, 'params': {}} + config_optimize_params = {} + for param in params: + if param in params_to_optimize: + config_optimize_params[param] = config_json['-' + param] + else: + result['params'][param] = config_json['-' + param] + + current_param_combination = param_combination(**config_optimize_params) + results[current_param_combination].append(result) + return results + + +def _FindOptimalParameter(configs_and_scores, score_weighting): + """Finds the config producing the maximal score. + + Args: + configs_and_scores: structure of the form returned by + _ConfigurationAndScores + + score_weighting: a function to weight together all score values of + the form [{params: {param1: value1, ...}, scores: + {score1: value1, ...}}] into a numeric + value + Returns: + the config that has the largest values of `score_weighting` applied + to its scores. + """ + + min_score = float('+inf') + best_params = None + for config in configs_and_scores: + scores_and_params = configs_and_scores[config] + current_score = score_weighting(scores_and_params) + if current_score < min_score: + min_score = current_score + best_params = config + logging.debug("Score: %f", current_score) + logging.debug("Config: %s", str(config)) + return best_params + + +def _ExampleWeighting(scores_and_configs): + """Example argument to `_FindOptimalParameter` + Args: + scores_and_configs: a list of configs and scores, in the form + described in _FindOptimalParameter + Returns: + numeric value, the sum of all scores + """ + res = 0 + for score_config in scores_and_configs: + res += sum(score_config['scores'].values()) + return res + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug('Src path <%s>', src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + all_scores = _ConfigurationAndScores(scores_data_frame, args.params, + args.params_not_to_optimize, + args.config_dir) + + opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) + + logging.info('Optimal parameter combination: <%s>', opt_param) + logging.info('It\'s score values: <%s>', all_scores[opt_param]) + + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py new file mode 100644 index 0000000000..80338c1373 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py @@ -0,0 +1,28 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the apm_quality_assessment module. +""" + +import sys +import unittest + +import mock + +import apm_quality_assessment + + +class TestSimulationScript(unittest.TestCase): + """Unit tests for the apm_quality_assessment module. + """ + + def testMain(self): + # Exit with error code if no arguments are passed. + with self.assertRaises(SystemExit) as cm, mock.patch.object( + sys, 'argv', ['apm_quality_assessment.py']): + apm_quality_assessment.main() + self.assertGreater(cm.exception.code, 0) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md new file mode 100644 index 0000000000..66e2a1c848 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/output/README.md @@ -0,0 +1 @@ +You can use this folder for the output generated by the apm_quality_assessment scripts. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py new file mode 100644 index 0000000000..b870dfaef3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py new file mode 100644 index 0000000000..93a8248397 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py @@ -0,0 +1,296 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Extraction of annotations from audio files. +""" + +from __future__ import division +import logging +import os +import shutil +import struct +import subprocess +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import external_vad +from . import exceptions +from . import signal_processing + + +class AudioAnnotationsExtractor(object): + """Extracts annotations from audio files. + """ + + class VadType(object): + ENERGY_THRESHOLD = 1 # TODO(alessiob): Consider switching to P56 standard. + WEBRTC_COMMON_AUDIO = 2 # common_audio/vad/include/vad.h + WEBRTC_APM = 4 # modules/audio_processing/vad/vad.h + + def __init__(self, value): + if (not isinstance(value, int)) or not 0 <= value <= 7: + raise exceptions.InitializationException('Invalid vad type: ' + + value) + self._value = value + + def Contains(self, vad_type): + return self._value | vad_type == self._value + + def __str__(self): + vads = [] + if self.Contains(self.ENERGY_THRESHOLD): + vads.append("energy") + if self.Contains(self.WEBRTC_COMMON_AUDIO): + vads.append("common_audio") + if self.Contains(self.WEBRTC_APM): + vads.append("apm") + return "VadType({})".format(", ".join(vads)) + + _OUTPUT_FILENAME_TEMPLATE = '{}annotations.npz' + + # Level estimation params. + _ONE_DB_REDUCTION = np.power(10.0, -1.0 / 20.0) + _LEVEL_FRAME_SIZE_MS = 1.0 + # The time constants in ms indicate the time it takes for the level estimate + # to go down/up by 1 db if the signal is zero. + _LEVEL_ATTACK_MS = 5.0 + _LEVEL_DECAY_MS = 20.0 + + # VAD params. + _VAD_THRESHOLD = 1 + _VAD_WEBRTC_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), + os.pardir, os.pardir) + _VAD_WEBRTC_COMMON_AUDIO_PATH = os.path.join(_VAD_WEBRTC_PATH, 'vad') + + _VAD_WEBRTC_APM_PATH = os.path.join(_VAD_WEBRTC_PATH, 'apm_vad') + + def __init__(self, vad_type, external_vads=None): + self._signal = None + self._level = None + self._level_frame_size = None + self._common_audio_vad = None + self._energy_vad = None + self._apm_vad_probs = None + self._apm_vad_rms = None + self._vad_frame_size = None + self._vad_frame_size_ms = None + self._c_attack = None + self._c_decay = None + + self._vad_type = self.VadType(vad_type) + logging.info('VADs used for annotations: ' + str(self._vad_type)) + + if external_vads is None: + external_vads = {} + self._external_vads = external_vads + + assert len(self._external_vads) == len(external_vads), ( + 'The external VAD names must be unique.') + for vad in external_vads.values(): + if not isinstance(vad, external_vad.ExternalVad): + raise exceptions.InitializationException('Invalid vad type: ' + + str(type(vad))) + logging.info('External VAD used for annotation: ' + str(vad.name)) + + assert os.path.exists(self._VAD_WEBRTC_COMMON_AUDIO_PATH), \ + self._VAD_WEBRTC_COMMON_AUDIO_PATH + assert os.path.exists(self._VAD_WEBRTC_APM_PATH), \ + self._VAD_WEBRTC_APM_PATH + + @classmethod + def GetOutputFileNameTemplate(cls): + return cls._OUTPUT_FILENAME_TEMPLATE + + def GetLevel(self): + return self._level + + def GetLevelFrameSize(self): + return self._level_frame_size + + @classmethod + def GetLevelFrameSizeMs(cls): + return cls._LEVEL_FRAME_SIZE_MS + + def GetVadOutput(self, vad_type): + if vad_type == self.VadType.ENERGY_THRESHOLD: + return self._energy_vad + elif vad_type == self.VadType.WEBRTC_COMMON_AUDIO: + return self._common_audio_vad + elif vad_type == self.VadType.WEBRTC_APM: + return (self._apm_vad_probs, self._apm_vad_rms) + else: + raise exceptions.InitializationException('Invalid vad type: ' + + vad_type) + + def GetVadFrameSize(self): + return self._vad_frame_size + + def GetVadFrameSizeMs(self): + return self._vad_frame_size_ms + + def Extract(self, filepath): + # Load signal. + self._signal = signal_processing.SignalProcessingUtils.LoadWav( + filepath) + if self._signal.channels != 1: + raise NotImplementedError( + 'Multiple-channel annotations not implemented') + + # Level estimation params. + self._level_frame_size = int(self._signal.frame_rate / 1000 * + (self._LEVEL_FRAME_SIZE_MS)) + self._c_attack = 0.0 if self._LEVEL_ATTACK_MS == 0 else ( + self._ONE_DB_REDUCTION**(self._LEVEL_FRAME_SIZE_MS / + self._LEVEL_ATTACK_MS)) + self._c_decay = 0.0 if self._LEVEL_DECAY_MS == 0 else ( + self._ONE_DB_REDUCTION**(self._LEVEL_FRAME_SIZE_MS / + self._LEVEL_DECAY_MS)) + + # Compute level. + self._LevelEstimation() + + # Ideal VAD output, it requires clean speech with high SNR as input. + if self._vad_type.Contains(self.VadType.ENERGY_THRESHOLD): + # Naive VAD based on level thresholding. + vad_threshold = np.percentile(self._level, self._VAD_THRESHOLD) + self._energy_vad = np.uint8(self._level > vad_threshold) + self._vad_frame_size = self._level_frame_size + self._vad_frame_size_ms = self._LEVEL_FRAME_SIZE_MS + if self._vad_type.Contains(self.VadType.WEBRTC_COMMON_AUDIO): + # WebRTC common_audio/ VAD. + self._RunWebRtcCommonAudioVad(filepath, self._signal.frame_rate) + if self._vad_type.Contains(self.VadType.WEBRTC_APM): + # WebRTC modules/audio_processing/ VAD. + self._RunWebRtcApmVad(filepath) + for extvad_name in self._external_vads: + self._external_vads[extvad_name].Run(filepath) + + def Save(self, output_path, annotation_name=""): + ext_kwargs = { + 'extvad_conf-' + ext_vad: + self._external_vads[ext_vad].GetVadOutput() + for ext_vad in self._external_vads + } + np.savez_compressed(file=os.path.join( + output_path, + self.GetOutputFileNameTemplate().format(annotation_name)), + level=self._level, + level_frame_size=self._level_frame_size, + level_frame_size_ms=self._LEVEL_FRAME_SIZE_MS, + vad_output=self._common_audio_vad, + vad_energy_output=self._energy_vad, + vad_frame_size=self._vad_frame_size, + vad_frame_size_ms=self._vad_frame_size_ms, + vad_probs=self._apm_vad_probs, + vad_rms=self._apm_vad_rms, + **ext_kwargs) + + def _LevelEstimation(self): + # Read samples. + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._signal).astype(np.float32) / 32768.0 + num_frames = len(samples) // self._level_frame_size + num_samples = num_frames * self._level_frame_size + + # Envelope. + self._level = np.max(np.reshape(np.abs(samples[:num_samples]), + (num_frames, self._level_frame_size)), + axis=1) + assert len(self._level) == num_frames + + # Envelope smoothing. + smooth = lambda curr, prev, k: (1 - k) * curr + k * prev + self._level[0] = smooth(self._level[0], 0.0, self._c_attack) + for i in range(1, num_frames): + self._level[i] = smooth( + self._level[i], self._level[i - 1], self._c_attack if + (self._level[i] > self._level[i - 1]) else self._c_decay) + + def _RunWebRtcCommonAudioVad(self, wav_file_path, sample_rate): + self._common_audio_vad = None + self._vad_frame_size = None + + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_COMMON_AUDIO_PATH, '-i', wav_file_path, '-o', + output_file_path + ], + cwd=self._VAD_WEBRTC_PATH) + + # Read bytes. + with open(output_file_path, 'rb') as f: + raw_data = f.read() + + # Parse side information. + self._vad_frame_size_ms = struct.unpack('B', raw_data[0])[0] + self._vad_frame_size = self._vad_frame_size_ms * sample_rate / 1000 + assert self._vad_frame_size_ms in [10, 20, 30] + extra_bits = struct.unpack('B', raw_data[-1])[0] + assert 0 <= extra_bits <= 8 + + # Init VAD vector. + num_bytes = len(raw_data) + num_frames = 8 * (num_bytes - + 2) - extra_bits # 8 frames for each byte. + self._common_audio_vad = np.zeros(num_frames, np.uint8) + + # Read VAD decisions. + for i, byte in enumerate(raw_data[1:-1]): + byte = struct.unpack('B', byte)[0] + for j in range(8 if i < num_bytes - 3 else (8 - extra_bits)): + self._common_audio_vad[i * 8 + j] = int(byte & 1) + byte = byte >> 1 + except Exception as e: + logging.error('Error while running the WebRTC VAD (' + e.message + + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def _RunWebRtcApmVad(self, wav_file_path): + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path_probs = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad_probs.tmp') + output_file_path_rms = os.path.join( + tmp_path, + os.path.split(wav_file_path)[1] + '_vad_rms.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_APM_PATH, '-i', wav_file_path, '-o_probs', + output_file_path_probs, '-o_rms', output_file_path_rms + ], + cwd=self._VAD_WEBRTC_PATH) + + # Parse annotations. + self._apm_vad_probs = np.fromfile(output_file_path_probs, + np.double) + self._apm_vad_rms = np.fromfile(output_file_path_rms, np.double) + assert len(self._apm_vad_rms) == len(self._apm_vad_probs) + + except Exception as e: + logging.error('Error while running the WebRTC APM VAD (' + + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py new file mode 100644 index 0000000000..8230208808 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py @@ -0,0 +1,160 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the annotations module. +""" + +from __future__ import division +import logging +import os +import shutil +import tempfile +import unittest + +import numpy as np + +from . import annotations +from . import external_vad +from . import input_signal_creator +from . import signal_processing + + +class TestAnnotationsExtraction(unittest.TestCase): + """Unit tests for the annotations module. + """ + + _CLEAN_TMP_OUTPUT = True + _DEBUG_PLOT_VAD = False + _VAD_TYPE_CLASS = annotations.AudioAnnotationsExtractor.VadType + _ALL_VAD_TYPES = (_VAD_TYPE_CLASS.ENERGY_THRESHOLD + | _VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO + | _VAD_TYPE_CLASS.WEBRTC_APM) + + def setUp(self): + """Create temporary folder.""" + self._tmp_path = tempfile.mkdtemp() + self._wav_file_path = os.path.join(self._tmp_path, 'tone.wav') + pure_tone, _ = input_signal_creator.InputSignalCreator.Create( + 'pure_tone', [440, 1000]) + signal_processing.SignalProcessingUtils.SaveWav( + self._wav_file_path, pure_tone) + self._sample_rate = pure_tone.frame_rate + + def tearDown(self): + """Recursively delete temporary folder.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + + (self._tmp_path)) + + def testFrameSizes(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + samples_to_ms = lambda n, sr: 1000 * n // sr + self.assertEqual( + samples_to_ms(e.GetLevelFrameSize(), self._sample_rate), + e.GetLevelFrameSizeMs()) + self.assertEqual(samples_to_ms(e.GetVadFrameSize(), self._sample_rate), + e.GetVadFrameSizeMs()) + + def testVoiceActivityDetectors(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + vad_type = self._VAD_TYPE_CLASS(vad_type_value) + e = annotations.AudioAnnotationsExtractor(vad_type=vad_type_value) + e.Extract(self._wav_file_path) + if vad_type.Contains(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput( + self._VAD_TYPE_CLASS.ENERGY_THRESHOLD) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual( + float(np.sum(vad_output)) / len(vad_output), 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput( + self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual( + float(np.sum(vad_output)) / len(vad_output), 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_APM): + # pylint: disable=unpacking-non-sequence + (vad_probs, + vad_rms) = e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM) + self.assertGreater(len(vad_probs), 0) + self.assertGreater(len(vad_rms), 0) + self.assertGreaterEqual( + float(np.sum(vad_probs)) / len(vad_probs), 0.5) + self.assertGreaterEqual( + float(np.sum(vad_rms)) / len(vad_rms), 20000) + + if self._DEBUG_PLOT_VAD: + frame_times_s = lambda num_frames, frame_size_ms: np.arange( + num_frames).astype(np.float32) * frame_size_ms / 1000.0 + level = e.GetLevel() + t_level = frame_times_s(num_frames=len(level), + frame_size_ms=e.GetLevelFrameSizeMs()) + t_vad = frame_times_s(num_frames=len(vad_output), + frame_size_ms=e.GetVadFrameSizeMs()) + import matplotlib.pyplot as plt + plt.figure() + plt.hold(True) + plt.plot(t_level, level) + plt.plot(t_vad, vad_output * np.max(level), '.') + plt.show() + + def testSaveLoad(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, "fake-annotation") + + data = np.load( + os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + np.testing.assert_array_equal(e.GetLevel(), data['level']) + self.assertEqual(np.float32, data['level'].dtype) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD), + data['vad_energy_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO), + data['vad_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[0], + data['vad_probs']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[1], + data['vad_rms']) + self.assertEqual(np.uint8, data['vad_energy_output'].dtype) + self.assertEqual(np.float64, data['vad_probs'].dtype) + self.assertEqual(np.float64, data['vad_rms'].dtype) + + def testEmptyExternalShouldNotCrash(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + annotations.AudioAnnotationsExtractor(vad_type_value, {}) + + def testFakeExternalSaveLoad(self): + def FakeExternalFactory(): + return external_vad.ExternalVad( + os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'fake_external_vad.py'), 'fake') + + for vad_type_value in range(0, self._ALL_VAD_TYPES + 1): + e = annotations.AudioAnnotationsExtractor( + vad_type_value, {'fake': FakeExternalFactory()}) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, annotation_name="fake-annotation") + data = np.load( + os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + self.assertEqual(np.float32, data['extvad_conf-fake'].dtype) + np.testing.assert_almost_equal(np.arange(100, dtype=np.float32), + data['extvad_conf-fake']) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc new file mode 100644 index 0000000000..73ce4ed3f7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, o_probs, "", "VAD probabilities output file"); +ABSL_FLAG(std::string, o_rms, "", "VAD output file"); + +namespace webrtc { +namespace test { +namespace { + +constexpr uint8_t kAudioFrameLengthMilliseconds = 10; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string output_probs_file = absl::GetFlag(FLAGS_o_probs); + const std::string output_file = absl::GetFlag(FLAGS_o_rms); + // Open wav input file and check properties. + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_len = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_len > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_probs_file(output_probs_file, std::ofstream::binary); + std::ofstream out_rms_file(output_file, std::ofstream::binary); + + // Run VAD and write decisions. + VoiceActivityDetector vad; + std::array samples; + + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_len, samples.data()); + if (read_samples < audio_frame_len) { + break; + } + vad.ProcessChunk(samples.data(), audio_frame_len, wav_reader.sample_rate()); + // Write output. + auto probs = vad.chunkwise_voice_probabilities(); + auto rms = vad.chunkwise_rms(); + RTC_CHECK_EQ(probs.size(), rms.size()); + RTC_CHECK_EQ(sizeof(double), 8); + + for (const auto& p : probs) { + out_probs_file.write(reinterpret_cast(&p), 8); + } + for (const auto& r : rms) { + out_rms_file.write(reinterpret_cast(&r), 8); + } + } + + out_probs_file.close(); + out_rms_file.close(); + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py new file mode 100644 index 0000000000..04aeaa95b9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py @@ -0,0 +1,100 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Class implementing a wrapper for APM simulators. +""" + +import cProfile +import logging +import os +import subprocess + +from . import data_access +from . import exceptions + + +class AudioProcWrapper(object): + """Wrapper for APM simulators. + """ + + DEFAULT_APM_SIMULATOR_BIN_PATH = os.path.abspath( + os.path.join(os.pardir, 'audioproc_f')) + OUTPUT_FILENAME = 'output.wav' + + def __init__(self, simulator_bin_path): + """Ctor. + + Args: + simulator_bin_path: path to the APM simulator binary. + """ + self._simulator_bin_path = simulator_bin_path + self._config = None + self._output_signal_filepath = None + + # Profiler instance to measure running time. + self._profiler = cProfile.Profile() + + @property + def output_filepath(self): + return self._output_signal_filepath + + def Run(self, + config_filepath, + capture_input_filepath, + output_path, + render_input_filepath=None): + """Runs APM simulator. + + Args: + config_filepath: path to the configuration file specifying the arguments + for the APM simulator. + capture_input_filepath: path to the capture audio track input file (aka + forward or near-end). + output_path: path of the audio track output file. + render_input_filepath: path to the render audio track input file (aka + reverse or far-end). + """ + # Init. + self._output_signal_filepath = os.path.join(output_path, + self.OUTPUT_FILENAME) + profiling_stats_filepath = os.path.join(output_path, 'profiling.stats') + + # Skip if the output has already been generated. + if os.path.exists(self._output_signal_filepath) and os.path.exists( + profiling_stats_filepath): + return + + # Load configuration. + self._config = data_access.AudioProcConfigFile.Load(config_filepath) + + # Set remaining parameters. + if not os.path.exists(capture_input_filepath): + raise exceptions.FileNotFoundError( + 'cannot find capture input file') + self._config['-i'] = capture_input_filepath + self._config['-o'] = self._output_signal_filepath + if render_input_filepath is not None: + if not os.path.exists(render_input_filepath): + raise exceptions.FileNotFoundError( + 'cannot find render input file') + self._config['-ri'] = render_input_filepath + + # Build arguments list. + args = [self._simulator_bin_path] + for param_name in self._config: + args.append(param_name) + if self._config[param_name] is not None: + args.append(str(self._config[param_name])) + logging.debug(' '.join(args)) + + # Run. + self._profiler.enable() + subprocess.call(args) + self._profiler.disable() + + # Save profiling stats. + self._profiler.dump_stats(profiling_stats_filepath) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py new file mode 100644 index 0000000000..38aac0cbe2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py @@ -0,0 +1,243 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Imports a filtered subset of the scores and configurations computed +by apm_quality_assessment.py into a pandas data frame. +""" + +import argparse +import glob +import logging +import os +import re +import sys + +try: + import pandas as pd +except ImportError: + logging.critical('Cannot import the third-party Python package pandas') + sys.exit(1) + +from . import data_access as data_access +from . import simulation as sim + +# Compiled regular expressions used to extract score descriptors. +RE_CONFIG_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixApmConfig() + + r'(.+)') +RE_CAPTURE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixCapture() + + r'(.+)') +RE_RENDER_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)') +RE_ECHO_SIM_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixEchoSimulator() + + r'(.+)') +RE_TEST_DATA_GEN_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)') +RE_TEST_DATA_GEN_PARAMS = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)') +RE_SCORE_NAME = re.compile(sim.ApmModuleSimulator.GetPrefixScore() + + r'(.+)(\..+)') + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser( + description=('Override this description in a user script by changing' + ' `parser.description` of the returned parser.')) + + parser.add_argument('-o', + '--output_dir', + required=True, + help=('the same base path used with the ' + 'apm_quality_assessment tool')) + + parser.add_argument( + '-c', + '--config_names', + type=re.compile, + help=('regular expression to filter the APM configuration' + ' names')) + + parser.add_argument( + '-i', + '--capture_names', + type=re.compile, + help=('regular expression to filter the capture signal ' + 'names')) + + parser.add_argument('-r', + '--render_names', + type=re.compile, + help=('regular expression to filter the render signal ' + 'names')) + + parser.add_argument( + '-e', + '--echo_simulator_names', + type=re.compile, + help=('regular expression to filter the echo simulator ' + 'names')) + + parser.add_argument('-t', + '--test_data_generators', + type=re.compile, + help=('regular expression to filter the test data ' + 'generator names')) + + parser.add_argument( + '-s', + '--eval_scores', + type=re.compile, + help=('regular expression to filter the evaluation score ' + 'names')) + + return parser + + +def _GetScoreDescriptors(score_filepath): + """Extracts a score descriptor from the given score file path. + + Args: + score_filepath: path to the score file. + + Returns: + A tuple of strings (APM configuration name, capture audio track name, + render audio track name, echo simulator name, test data generator name, + test data generator parameters as string, evaluation score name). + """ + fields = score_filepath.split(os.sep)[-7:] + extract_name = lambda index, reg_expr: (reg_expr.match(fields[index]). + groups(0)[0]) + return ( + extract_name(0, RE_CONFIG_NAME), + extract_name(1, RE_CAPTURE_NAME), + extract_name(2, RE_RENDER_NAME), + extract_name(3, RE_ECHO_SIM_NAME), + extract_name(4, RE_TEST_DATA_GEN_NAME), + extract_name(5, RE_TEST_DATA_GEN_PARAMS), + extract_name(6, RE_SCORE_NAME), + ) + + +def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name, args): + """Decides whether excluding a score. + + A set of optional regular expressions in args is used to determine if the + score should be excluded (depending on its |*_name| descriptors). + + Args: + config_name: APM configuration name. + capture_name: capture audio track name. + render_name: render audio track name. + echo_simulator_name: echo simulator name. + test_data_gen_name: test data generator name. + score_name: evaluation score name. + args: parsed arguments. + + Returns: + A boolean. + """ + value_regexpr_pairs = [ + (config_name, args.config_names), + (capture_name, args.capture_names), + (render_name, args.render_names), + (echo_simulator_name, args.echo_simulator_names), + (test_data_gen_name, args.test_data_generators), + (score_name, args.eval_scores), + ] + + # Score accepted if each value matches the corresponding regular expression. + for value, regexpr in value_regexpr_pairs: + if regexpr is None: + continue + if not regexpr.match(value): + return True + + return False + + +def FindScores(src_path, args): + """Given a search path, find scores and return a DataFrame object. + + Args: + src_path: Search path pattern. + args: parsed arguments. + + Returns: + A DataFrame object. + """ + # Get scores. + scores = [] + for score_filepath in glob.iglob(src_path): + # Extract score descriptor fields from the path. + (config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, test_data_gen_params, + score_name) = _GetScoreDescriptors(score_filepath) + + # Ignore the score if required. + if _ExcludeScore(config_name, capture_name, render_name, + echo_simulator_name, test_data_gen_name, score_name, + args): + logging.info('ignored score: %s %s %s %s %s %s', config_name, + capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name) + continue + + # Read metadata and score. + metadata = data_access.Metadata.LoadAudioTestDataPaths( + os.path.split(score_filepath)[0]) + score = data_access.ScoreFile.Load(score_filepath) + + # Add a score with its descriptor fields. + scores.append(( + metadata['clean_capture_input_filepath'], + metadata['echo_free_capture_filepath'], + metadata['echo_filepath'], + metadata['render_filepath'], + metadata['capture_filepath'], + metadata['apm_output_filepath'], + metadata['apm_reference_filepath'], + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name, + score, + )) + + return pd.DataFrame(data=scores, + columns=( + 'clean_capture_input_filepath', + 'echo_free_capture_filepath', + 'echo_filepath', + 'render_filepath', + 'capture_filepath', + 'apm_output_filepath', + 'apm_reference_filepath', + 'apm_config', + 'capture', + 'render', + 'echo_simulator', + 'test_data_gen', + 'test_data_gen_params', + 'eval_score_name', + 'score', + )) + + +def ConstructSrcPath(args): + return os.path.join( + args.output_dir, + sim.ApmModuleSimulator.GetPrefixApmConfig() + '*', + sim.ApmModuleSimulator.GetPrefixCapture() + '*', + sim.ApmModuleSimulator.GetPrefixRender() + '*', + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*', + sim.ApmModuleSimulator.GetPrefixScore() + '*') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py new file mode 100644 index 0000000000..c1aebb67f1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py @@ -0,0 +1,154 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Data access utility functions and classes. +""" + +import json +import os + + +def MakeDirectory(path): + """Makes a directory recursively without rising exceptions if existing. + + Args: + path: path to the directory to be created. + """ + if os.path.exists(path): + return + os.makedirs(path) + + +class Metadata(object): + """Data access class to save and load metadata. + """ + + def __init__(self): + pass + + _GENERIC_METADATA_SUFFIX = '.mdata' + _AUDIO_TEST_DATA_FILENAME = 'audio_test_data.json' + + @classmethod + def LoadFileMetadata(cls, filepath): + """Loads generic metadata linked to a file. + + Args: + filepath: path to the metadata file to read. + + Returns: + A dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX) as f: + return json.load(f) + + @classmethod + def SaveFileMetadata(cls, filepath, metadata): + """Saves generic metadata linked to a file. + + Args: + filepath: path to the metadata file to write. + metadata: a dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX, 'w') as f: + json.dump(metadata, f) + + @classmethod + def LoadAudioTestDataPaths(cls, metadata_path): + """Loads the input and the reference audio track paths. + + Args: + metadata_path: path to the directory containing the metadata file. + + Returns: + Tuple with the paths to the input and output audio tracks. + """ + metadata_filepath = os.path.join(metadata_path, + cls._AUDIO_TEST_DATA_FILENAME) + with open(metadata_filepath) as f: + return json.load(f) + + @classmethod + def SaveAudioTestDataPaths(cls, output_path, **filepaths): + """Saves the input and the reference audio track paths. + + Args: + output_path: path to the directory containing the metadata file. + + Keyword Args: + filepaths: collection of audio track file paths to save. + """ + output_filepath = os.path.join(output_path, + cls._AUDIO_TEST_DATA_FILENAME) + with open(output_filepath, 'w') as f: + json.dump(filepaths, f) + + +class AudioProcConfigFile(object): + """Data access to load/save APM simulator argument lists. + + The arguments stored in the config files are used to control the APM flags. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + + Returns: + A dict containing the configuration. + """ + with open(filepath) as f: + return json.load(f) + + @classmethod + def Save(cls, filepath, config): + """Saves a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + config: a dict containing the configuration. + """ + with open(filepath, 'w') as f: + json.dump(config, f) + + +class ScoreFile(object): + """Data access class to save and load float scalar scores. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a score from file. + + Args: + filepath: path to the score file. + + Returns: + A float encoding the score. + """ + with open(filepath) as f: + return float(f.readline().strip()) + + @classmethod + def Save(cls, filepath, score): + """Saves a score into a file. + + Args: + filepath: path to the score file. + score: float encoding the score. + """ + with open(filepath, 'w') as f: + f.write('{0:f}\n'.format(score)) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py new file mode 100644 index 0000000000..65903ea32d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py @@ -0,0 +1,136 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Echo path simulation module. +""" + +import hashlib +import os + +from . import signal_processing + + +class EchoPathSimulator(object): + """Abstract class for the echo path simulators. + + In general, an echo path simulator is a function of the render signal and + simulates the propagation of the latter into the microphone (e.g., due to + mechanical or electrical paths). + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self): + pass + + def Simulate(self, output_path): + """Creates the echo signal and stores it in an audio file (abstract method). + + Args: + output_path: Path in which any output can be saved. + + Returns: + Path to the generated audio track file or None if no echo is present. + """ + raise NotImplementedError() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EchoPathSimulator implementation. + + Decorator to automatically register the classes that extend + EchoPathSimulator. + Example usage: + + @EchoPathSimulator.RegisterClass + class NoEchoPathSimulator(EchoPathSimulator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + +@EchoPathSimulator.RegisterClass +class NoEchoPathSimulator(EchoPathSimulator): + """Simulates absence of echo.""" + + NAME = 'noecho' + + def __init__(self): + EchoPathSimulator.__init__(self) + + def Simulate(self, output_path): + return None + + +@EchoPathSimulator.RegisterClass +class LinearEchoPathSimulator(EchoPathSimulator): + """Simulates linear echo path. + + This class applies a given impulse response to the render input and then it + sums the signal to the capture input signal. + """ + + NAME = 'linear' + + def __init__(self, render_input_filepath, impulse_response): + """ + Args: + render_input_filepath: Render audio track file. + impulse_response: list or numpy vector of float values. + """ + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + self._impulse_response = impulse_response + + def Simulate(self, output_path): + """Simulates linear echo path.""" + # Form the file name with a hash of the impulse response. + impulse_response_hash = hashlib.sha256( + str(self._impulse_response).encode('utf-8', 'ignore')).hexdigest() + echo_filepath = os.path.join( + output_path, 'linear_echo_{}.wav'.format(impulse_response_hash)) + + # If the simulated echo audio track file does not exists, create it. + if not os.path.exists(echo_filepath): + render = signal_processing.SignalProcessingUtils.LoadWav( + self._render_input_filepath) + echo = signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + render, self._impulse_response) + signal_processing.SignalProcessingUtils.SaveWav( + echo_filepath, echo) + + return echo_filepath + + +@EchoPathSimulator.RegisterClass +class RecordedEchoPathSimulator(EchoPathSimulator): + """Uses recorded echo. + + This class uses the clean capture input file name to build the file name of + the corresponding recording containing echo (a predefined suffix is used). + Such a file is expected to be already existing. + """ + + NAME = 'recorded' + + _FILE_NAME_SUFFIX = '_echo' + + def __init__(self, render_input_filepath): + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + + def Simulate(self, output_path): + """Uses recorded echo path.""" + path, file_name_ext = os.path.split(self._render_input_filepath) + file_name, file_ext = os.path.splitext(file_name_ext) + echo_filepath = os.path.join( + path, '{}{}{}'.format(file_name, self._FILE_NAME_SUFFIX, file_ext)) + assert os.path.exists(echo_filepath), ( + 'cannot find the echo audio track file {}'.format(echo_filepath)) + return echo_filepath diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py new file mode 100644 index 0000000000..4b46b36b47 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py @@ -0,0 +1,48 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Echo path simulation factory module. +""" + +import numpy as np + +from . import echo_path_simulation + + +class EchoPathSimulatorFactory(object): + + # TODO(alessiob): Replace 20 ms delay (at 48 kHz sample rate) with a more + # realistic impulse response. + _LINEAR_ECHO_IMPULSE_RESPONSE = np.array([0.0] * (20 * 48) + [0.15]) + + def __init__(self): + pass + + @classmethod + def GetInstance(cls, echo_path_simulator_class, render_input_filepath): + """Creates an EchoPathSimulator instance given a class object. + + Args: + echo_path_simulator_class: EchoPathSimulator class object (not an + instance). + render_input_filepath: Path to the render audio track file. + + Returns: + An EchoPathSimulator instance. + """ + assert render_input_filepath is not None or ( + echo_path_simulator_class == + echo_path_simulation.NoEchoPathSimulator) + + if echo_path_simulator_class == echo_path_simulation.NoEchoPathSimulator: + return echo_path_simulation.NoEchoPathSimulator() + elif echo_path_simulator_class == ( + echo_path_simulation.LinearEchoPathSimulator): + return echo_path_simulation.LinearEchoPathSimulator( + render_input_filepath, cls._LINEAR_ECHO_IMPULSE_RESPONSE) + else: + return echo_path_simulator_class(render_input_filepath) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py new file mode 100644 index 0000000000..b6cc8abdde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py @@ -0,0 +1,82 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the echo path simulation module. +""" + +import shutil +import os +import tempfile +import unittest + +import pydub + +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import signal_processing + + +class TestEchoPathSimulators(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create and save white noise. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._audio_track_num_samples = ( + signal_processing.SignalProcessingUtils.CountSamples(white_noise)) + self._audio_track_filepath = os.path.join(self._tmp_path, + 'white_noise.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._audio_track_filepath, white_noise) + + # Make a copy the white noise audio track file; it will be used by + # echo_path_simulation.RecordedEchoPathSimulator. + shutil.copy(self._audio_track_filepath, + os.path.join(self._tmp_path, 'white_noise_echo.wav')) + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testRegisteredClasses(self): + # Check that there is at least one registered echo path simulator. + registered_classes = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance factory. + factory = echo_path_simulation_factory.EchoPathSimulatorFactory() + + # Try each registered echo path simulator. + for echo_path_simulator_name in registered_classes: + simulator = factory.GetInstance( + echo_path_simulator_class=registered_classes[ + echo_path_simulator_name], + render_input_filepath=self._audio_track_filepath) + + echo_filepath = simulator.Simulate(self._tmp_path) + if echo_filepath is None: + self.assertEqual(echo_path_simulation.NoEchoPathSimulator.NAME, + echo_path_simulator_name) + # No other tests in this case. + continue + + # Check that the echo audio track file exists and its length is greater or + # equal to that of the render audio track. + self.assertTrue(os.path.exists(echo_filepath)) + echo = signal_processing.SignalProcessingUtils.LoadWav( + echo_filepath) + self.assertGreaterEqual( + signal_processing.SignalProcessingUtils.CountSamples(echo), + self._audio_track_num_samples) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py new file mode 100644 index 0000000000..59c5f74be4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py @@ -0,0 +1,427 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Evaluation score abstract class and implementations. +""" + +from __future__ import division +import logging +import os +import re +import subprocess +import sys + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class EvaluationScore(object): + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, score_filename_prefix): + self._score_filename_prefix = score_filename_prefix + self._input_signal_metadata = None + self._reference_signal = None + self._reference_signal_filepath = None + self._tested_signal = None + self._tested_signal_filepath = None + self._output_filepath = None + self._score = None + self._render_signal_filepath = None + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EvaluationScore implementation. + + Decorator to automatically register the classes that extend EvaluationScore. + Example usage: + + @EvaluationScore.RegisterClass + class AudioLevelScore(EvaluationScore): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def output_filepath(self): + return self._output_filepath + + @property + def score(self): + return self._score + + def SetInputSignalMetadata(self, metadata): + """Sets input signal metadata. + + Args: + metadata: dict instance. + """ + self._input_signal_metadata = metadata + + def SetReferenceSignalFilepath(self, filepath): + """Sets the path to the audio track used as reference signal. + + Args: + filepath: path to the reference audio track. + """ + self._reference_signal_filepath = filepath + + def SetTestedSignalFilepath(self, filepath): + """Sets the path to the audio track used as test signal. + + Args: + filepath: path to the test audio track. + """ + self._tested_signal_filepath = filepath + + def SetRenderSignalFilepath(self, filepath): + """Sets the path to the audio track used as render signal. + + Args: + filepath: path to the test audio track. + """ + self._render_signal_filepath = filepath + + def Run(self, output_path): + """Extracts the score for the set test data pair. + + Args: + output_path: path to the directory where the output is written. + """ + self._output_filepath = os.path.join( + output_path, self._score_filename_prefix + self.NAME + '.txt') + try: + # If the score has already been computed, load. + self._LoadScore() + logging.debug('score found and loaded') + except IOError: + # Compute the score. + logging.debug('score not found, compute') + self._Run(output_path) + + def _Run(self, output_path): + # Abstract method. + raise NotImplementedError() + + def _LoadReferenceSignal(self): + assert self._reference_signal_filepath is not None + self._reference_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._reference_signal_filepath) + + def _LoadTestedSignal(self): + assert self._tested_signal_filepath is not None + self._tested_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._tested_signal_filepath) + + def _LoadScore(self): + return data_access.ScoreFile.Load(self._output_filepath) + + def _SaveScore(self): + return data_access.ScoreFile.Save(self._output_filepath, self._score) + + +@EvaluationScore.RegisterClass +class AudioLevelPeakScore(EvaluationScore): + """Peak audio level score. + + Defined as the difference between the peak audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_peak' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + self._score = self._tested_signal.dBFS - self._reference_signal.dBFS + self._SaveScore() + + +@EvaluationScore.RegisterClass +class MeanAudioLevelScore(EvaluationScore): + """Mean audio level score. + + Defined as the difference between the mean audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_mean' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + + dbfs_diffs_sum = 0.0 + seconds = min(len(self._tested_signal), len( + self._reference_signal)) // 1000 + for t in range(seconds): + t0 = t * seconds + t1 = t0 + seconds + dbfs_diffs_sum += (self._tested_signal[t0:t1].dBFS - + self._reference_signal[t0:t1].dBFS) + self._score = dbfs_diffs_sum / float(seconds) + self._SaveScore() + + +@EvaluationScore.RegisterClass +class EchoMetric(EvaluationScore): + """Echo score. + + Proportion of detected echo. + + Unit: ratio + Ideal: 0 + Worst case: 1 + """ + + NAME = 'echo_metric' + + def __init__(self, score_filename_prefix, echo_detector_bin_filepath): + EvaluationScore.__init__(self, score_filename_prefix) + + # POLQA binary file path. + self._echo_detector_bin_filepath = echo_detector_bin_filepath + if not os.path.exists(self._echo_detector_bin_filepath): + logging.error('cannot find EchoMetric tool binary file') + raise exceptions.FileNotFoundError() + + self._echo_detector_bin_path, _ = os.path.split( + self._echo_detector_bin_filepath) + + def _Run(self, output_path): + echo_detector_out_filepath = os.path.join(output_path, + 'echo_detector.out') + if os.path.exists(echo_detector_out_filepath): + os.unlink(echo_detector_out_filepath) + + logging.debug("Render signal filepath: %s", + self._render_signal_filepath) + if not os.path.exists(self._render_signal_filepath): + logging.error( + "Render input required for evaluating the echo metric.") + + args = [ + self._echo_detector_bin_filepath, '--output_file', + echo_detector_out_filepath, '--', '-i', + self._tested_signal_filepath, '-ri', self._render_signal_filepath + ] + logging.debug(' '.join(args)) + subprocess.call(args, cwd=self._echo_detector_bin_path) + + # Parse Echo detector tool output and extract the score. + self._score = self._ParseOutputFile(echo_detector_out_filepath) + self._SaveScore() + + @classmethod + def _ParseOutputFile(cls, echo_metric_file_path): + """ + Parses the POLQA tool output formatted as a table ('-t' option). + + Args: + polqa_out_filepath: path to the POLQA tool output file. + + Returns: + The score as a number in [0, 1]. + """ + with open(echo_metric_file_path) as f: + return float(f.read()) + + +@EvaluationScore.RegisterClass +class PolqaScore(EvaluationScore): + """POLQA score. + + See http://www.polqa.info/. + + Unit: MOS + Ideal: 4.5 + Worst case: 1.0 + """ + + NAME = 'polqa' + + def __init__(self, score_filename_prefix, polqa_bin_filepath): + EvaluationScore.__init__(self, score_filename_prefix) + + # POLQA binary file path. + self._polqa_bin_filepath = polqa_bin_filepath + if not os.path.exists(self._polqa_bin_filepath): + logging.error('cannot find POLQA tool binary file') + raise exceptions.FileNotFoundError() + + # Path to the POLQA directory with binary and license files. + self._polqa_tool_path, _ = os.path.split(self._polqa_bin_filepath) + + def _Run(self, output_path): + polqa_out_filepath = os.path.join(output_path, 'polqa.out') + if os.path.exists(polqa_out_filepath): + os.unlink(polqa_out_filepath) + + args = [ + self._polqa_bin_filepath, + '-t', + '-q', + '-Overwrite', + '-Ref', + self._reference_signal_filepath, + '-Test', + self._tested_signal_filepath, + '-LC', + 'NB', + '-Out', + polqa_out_filepath, + ] + logging.debug(' '.join(args)) + subprocess.call(args, cwd=self._polqa_tool_path) + + # Parse POLQA tool output and extract the score. + polqa_output = self._ParseOutputFile(polqa_out_filepath) + self._score = float(polqa_output['PolqaScore']) + + self._SaveScore() + + @classmethod + def _ParseOutputFile(cls, polqa_out_filepath): + """ + Parses the POLQA tool output formatted as a table ('-t' option). + + Args: + polqa_out_filepath: path to the POLQA tool output file. + + Returns: + A dict. + """ + data = [] + with open(polqa_out_filepath) as f: + for line in f: + line = line.strip() + if len(line) == 0 or line.startswith('*'): + # Ignore comments. + continue + # Read fields. + data.append(re.split(r'\t+', line)) + + # Two rows expected (header and values). + assert len(data) == 2, 'Cannot parse POLQA output' + number_of_fields = len(data[0]) + assert number_of_fields == len(data[1]) + + # Build and return a dictionary with field names (header) as keys and the + # corresponding field values as values. + return { + data[0][index]: data[1][index] + for index in range(number_of_fields) + } + + +@EvaluationScore.RegisterClass +class TotalHarmonicDistorsionScore(EvaluationScore): + """Total harmonic distorsion plus noise score. + + Total harmonic distorsion plus noise score. + See "https://en.wikipedia.org/wiki/Total_harmonic_distortion#THD.2BN". + + Unit: -. + Ideal: 0. + Worst case: +inf + """ + + NAME = 'thd' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + self._input_frequency = None + + def _Run(self, output_path): + self._CheckInputSignal() + + self._LoadTestedSignal() + if self._tested_signal.channels != 1: + raise exceptions.EvaluationScoreException( + 'unsupported number of channels') + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._tested_signal) + + # Init. + num_samples = len(samples) + duration = len(self._tested_signal) / 1000.0 + scaling = 2.0 / num_samples + max_freq = self._tested_signal.frame_rate / 2 + f0_freq = float(self._input_frequency) + t = np.linspace(0, duration, num_samples) + + # Analyze harmonics. + b_terms = [] + n = 1 + while f0_freq * n < max_freq: + x_n = np.sum( + samples * np.sin(2.0 * np.pi * n * f0_freq * t)) * scaling + y_n = np.sum( + samples * np.cos(2.0 * np.pi * n * f0_freq * t)) * scaling + b_terms.append(np.sqrt(x_n**2 + y_n**2)) + n += 1 + + output_without_fundamental = samples - b_terms[0] * np.sin( + 2.0 * np.pi * f0_freq * t) + distortion_and_noise = np.sqrt( + np.sum(output_without_fundamental**2) * np.pi * scaling) + + # TODO(alessiob): Fix or remove if not needed. + # thd = np.sqrt(np.sum(b_terms[1:]**2)) / b_terms[0] + + # TODO(alessiob): Check the range of `thd_plus_noise` and update the class + # docstring above if accordingly. + thd_plus_noise = distortion_and_noise / b_terms[0] + + self._score = thd_plus_noise + self._SaveScore() + + def _CheckInputSignal(self): + # Check input signal and get properties. + try: + if self._input_signal_metadata['signal'] != 'pure_tone': + raise exceptions.EvaluationScoreException( + 'The THD score requires a pure tone as input signal') + self._input_frequency = self._input_signal_metadata['frequency'] + if self._input_signal_metadata[ + 'test_data_gen_name'] != 'identity' or ( + self._input_signal_metadata['test_data_gen_config'] != + 'default'): + raise exceptions.EvaluationScoreException( + 'The THD score cannot be used with any test data generator other ' + 'than "identity"') + except TypeError: + raise exceptions.EvaluationScoreException( + 'The THD score requires an input signal with associated metadata' + ) + except KeyError: + raise exceptions.EvaluationScoreException( + 'Invalid input signal metadata to compute the THD score') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py new file mode 100644 index 0000000000..5749a8924b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py @@ -0,0 +1,55 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""EvaluationScore factory class. +""" + +import logging + +from . import exceptions +from . import eval_scores + + +class EvaluationScoreWorkerFactory(object): + """Factory class used to instantiate evaluation score workers. + + The ctor gets the parametrs that are used to instatiate the evaluation score + workers. + """ + + def __init__(self, polqa_tool_bin_path, echo_metric_tool_bin_path): + self._score_filename_prefix = None + self._polqa_tool_bin_path = polqa_tool_bin_path + self._echo_metric_tool_bin_path = echo_metric_tool_bin_path + + def SetScoreFilenamePrefix(self, prefix): + self._score_filename_prefix = prefix + + def GetInstance(self, evaluation_score_class): + """Creates an EvaluationScore instance given a class object. + + Args: + evaluation_score_class: EvaluationScore class object (not an instance). + + Returns: + An EvaluationScore instance. + """ + if self._score_filename_prefix is None: + raise exceptions.InitializationException( + 'The score file name prefix for evaluation score workers is not set' + ) + logging.debug('factory producing a %s evaluation score', + evaluation_score_class) + + if evaluation_score_class == eval_scores.PolqaScore: + return eval_scores.PolqaScore(self._score_filename_prefix, + self._polqa_tool_bin_path) + elif evaluation_score_class == eval_scores.EchoMetric: + return eval_scores.EchoMetric(self._score_filename_prefix, + self._echo_metric_tool_bin_path) + else: + return evaluation_score_class(self._score_filename_prefix) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py new file mode 100644 index 0000000000..12e043320e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py @@ -0,0 +1,137 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the eval_scores module. +""" + +import os +import shutil +import tempfile +import unittest + +import pydub + +from . import data_access +from . import eval_scores +from . import eval_scores_factory +from . import signal_processing + + +class TestEvalScores(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Create temporary output folder and two audio track files.""" + self._output_path = tempfile.mkdtemp() + + # Create fake reference and tested (i.e., APM output) audio track files. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_reference_signal = (signal_processing.SignalProcessingUtils. + GenerateWhiteNoise(silence)) + fake_tested_signal = (signal_processing.SignalProcessingUtils. + GenerateWhiteNoise(silence)) + + # Save fake audio tracks. + self._fake_reference_signal_filepath = os.path.join( + self._output_path, 'fake_ref.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_reference_signal_filepath, fake_reference_signal) + self._fake_tested_signal_filepath = os.path.join( + self._output_path, 'fake_test.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_tested_signal_filepath, fake_tested_signal) + + def tearDown(self): + """Recursively delete temporary folder.""" + shutil.rmtree(self._output_path) + + def testRegisteredClasses(self): + # Evaluation score names to exclude (tested separately). + exceptions = ['thd', 'echo_metric'] + + # Preliminary check. + self.assertTrue(os.path.exists(self._output_path)) + + # Check that there is at least one registered evaluation score worker. + registered_classes = eval_scores.EvaluationScore.REGISTERED_CLASSES + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance evaluation score workers factory with fake dependencies. + eval_score_workers_factory = ( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'fake_polqa'), + echo_metric_tool_bin_path=None)) + eval_score_workers_factory.SetScoreFilenamePrefix('scores-') + + # Try each registered evaluation score worker. + for eval_score_name in registered_classes: + if eval_score_name in exceptions: + continue + + # Instance evaluation score worker. + eval_score_worker = eval_score_workers_factory.GetInstance( + registered_classes[eval_score_name]) + + # Set fake input metadata and reference and test file paths, then run. + eval_score_worker.SetReferenceSignalFilepath( + self._fake_reference_signal_filepath) + eval_score_worker.SetTestedSignalFilepath( + self._fake_tested_signal_filepath) + eval_score_worker.Run(self._output_path) + + # Check output. + score = data_access.ScoreFile.Load( + eval_score_worker.output_filepath) + self.assertTrue(isinstance(score, float)) + + def testTotalHarmonicDistorsionScore(self): + # Init. + pure_tone_freq = 5000.0 + eval_score_worker = eval_scores.TotalHarmonicDistorsionScore('scores-') + eval_score_worker.SetInputSignalMetadata({ + 'signal': + 'pure_tone', + 'frequency': + pure_tone_freq, + 'test_data_gen_name': + 'identity', + 'test_data_gen_config': + 'default', + }) + template = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Create 3 test signals: pure tone, pure tone + white noise, white noise + # only. + pure_tone = signal_processing.SignalProcessingUtils.GeneratePureTone( + template, pure_tone_freq) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + template) + noisy_tone = signal_processing.SignalProcessingUtils.MixSignals( + pure_tone, white_noise) + + # Compute scores for increasingly distorted pure tone signals. + scores = [None, None, None] + for index, tested_signal in enumerate( + [pure_tone, noisy_tone, white_noise]): + # Save signal. + tmp_filepath = os.path.join(self._output_path, 'tmp_thd.wav') + signal_processing.SignalProcessingUtils.SaveWav( + tmp_filepath, tested_signal) + + # Compute score. + eval_score_worker.SetTestedSignalFilepath(tmp_filepath) + eval_score_worker.Run(self._output_path) + scores[index] = eval_score_worker.score + + # Remove output file to avoid caching. + os.remove(eval_score_worker.output_filepath) + + # Validate scores (lowest score with a pure tone). + self.assertTrue(all([scores[i + 1] > scores[i] for i in range(2)])) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py new file mode 100644 index 0000000000..2599085329 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py @@ -0,0 +1,57 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Evaluator of the APM module. +""" + +import logging + + +class ApmModuleEvaluator(object): + """APM evaluator class. + """ + + def __init__(self): + pass + + @classmethod + def Run(cls, evaluation_score_workers, apm_input_metadata, + apm_output_filepath, reference_input_filepath, + render_input_filepath, output_path): + """Runs the evaluation. + + Iterates over the given evaluation score workers. + + Args: + evaluation_score_workers: list of EvaluationScore instances. + apm_input_metadata: dictionary with metadata of the APM input. + apm_output_filepath: path to the audio track file with the APM output. + reference_input_filepath: path to the reference audio track file. + output_path: output path. + + Returns: + A dict of evaluation score name and score pairs. + """ + # Init. + scores = {} + + for evaluation_score_worker in evaluation_score_workers: + logging.info(' computing <%s> score', + evaluation_score_worker.NAME) + evaluation_score_worker.SetInputSignalMetadata(apm_input_metadata) + evaluation_score_worker.SetReferenceSignalFilepath( + reference_input_filepath) + evaluation_score_worker.SetTestedSignalFilepath( + apm_output_filepath) + evaluation_score_worker.SetRenderSignalFilepath( + render_input_filepath) + + evaluation_score_worker.Run(output_path) + scores[ + evaluation_score_worker.NAME] = evaluation_score_worker.score + + return scores diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py new file mode 100644 index 0000000000..893901d359 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py @@ -0,0 +1,45 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Exception classes. +""" + + +class FileNotFoundError(Exception): + """File not found exception. + """ + pass + + +class SignalProcessingException(Exception): + """Signal processing exception. + """ + pass + + +class InputMixerException(Exception): + """Input mixer exception. + """ + pass + + +class InputSignalCreatorException(Exception): + """Input signal creator exception. + """ + pass + + +class EvaluationScoreException(Exception): + """Evaluation score exception. + """ + pass + + +class InitializationException(Exception): + """Initialization exception. + """ + pass diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py new file mode 100644 index 0000000000..fe3a6c7cb9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py @@ -0,0 +1,426 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import functools +import hashlib +import logging +import os +import re +import sys + +try: + import csscompressor +except ImportError: + logging.critical( + 'Cannot import the third-party Python package csscompressor') + sys.exit(1) + +try: + import jsmin +except ImportError: + logging.critical('Cannot import the third-party Python package jsmin') + sys.exit(1) + + +class HtmlExport(object): + """HTML exporter class for APM quality scores.""" + + _NEW_LINE = '\n' + + # CSS and JS file paths. + _PATH = os.path.dirname(os.path.realpath(__file__)) + _CSS_FILEPATH = os.path.join(_PATH, 'results.css') + _CSS_MINIFIED = True + _JS_FILEPATH = os.path.join(_PATH, 'results.js') + _JS_MINIFIED = True + + def __init__(self, output_filepath): + self._scores_data_frame = None + self._output_filepath = output_filepath + + def Export(self, scores_data_frame): + """Exports scores into an HTML file. + + Args: + scores_data_frame: DataFrame instance. + """ + self._scores_data_frame = scores_data_frame + html = [ + '', + self._BuildHeader(), + (''), '', + self._BuildBody(), '', '' + ] + self._Save(self._output_filepath, self._NEW_LINE.join(html)) + + def _BuildHeader(self): + """Builds the section of the HTML file. + + The header contains the page title and either embedded or linked CSS and JS + files. + + Returns: + A string with ... HTML. + """ + html = ['', 'Results'] + + # Add Material Design hosted libs. + html.append('') + html.append( + '') + html.append( + '') + html.append('') + + # Embed custom JavaScript and CSS files. + html.append('') + html.append('') + + html.append('') + + return self._NEW_LINE.join(html) + + def _BuildBody(self): + """Builds the content of the section.""" + score_names = self._scores_data_frame[ + 'eval_score_name'].drop_duplicates().values.tolist() + + html = [ + ('
'), + '
', + '
', + 'APM QA results ({})'.format( + self._output_filepath), + '
', + ] + + # Tab selectors. + html.append('
') + for tab_index, score_name in enumerate(score_names): + is_active = tab_index == 0 + html.append('' + '{}'.format(tab_index, + ' is-active' if is_active else '', + self._FormatName(score_name))) + html.append('
') + + html.append('
') + html.append( + '
') + + # Tabs content. + for tab_index, score_name in enumerate(score_names): + html.append('
'.format( + ' is-active' if is_active else '', tab_index)) + html.append('
') + html.append( + self._BuildScoreTab(score_name, ('s{}'.format(tab_index), ))) + html.append('
') + html.append('
') + + html.append('
') + html.append('
') + + # Add snackbar for notifications. + html.append( + '
' + '
' + '' + '
') + + return self._NEW_LINE.join(html) + + def _BuildScoreTab(self, score_name, anchor_data): + """Builds the content of a tab.""" + # Find unique values. + scores = self._scores_data_frame[ + self._scores_data_frame.eval_score_name == score_name] + apm_configs = sorted(self._FindUniqueTuples(scores, ['apm_config'])) + test_data_gen_configs = sorted( + self._FindUniqueTuples(scores, + ['test_data_gen', 'test_data_gen_params'])) + + html = [ + '
', + '
', + '
', + (''), + ] + + # Header. + html.append('') + for test_data_gen_info in test_data_gen_configs: + html.append(''.format( + self._FormatName(test_data_gen_info[0]), + test_data_gen_info[1])) + html.append('') + + # Body. + html.append('') + for apm_config in apm_configs: + html.append('') + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1]) + html.append( + ''. + format( + dialog_id, + self._BuildScoreTableCell(score_name, + test_data_gen_info[0], + test_data_gen_info[1], + apm_config[0]))) + html.append('') + html.append('') + + html.append( + '
APM config / Test data generator{} {}
' + self._FormatName(apm_config[0]) + '{}
') + + html.append( + self._BuildScoreStatsInspectorDialogs(score_name, apm_configs, + test_data_gen_configs, + anchor_data)) + + return self._NEW_LINE.join(html) + + def _BuildScoreTableCell(self, score_name, test_data_gen, + test_data_gen_params, apm_config): + """Builds the content of a table cell for a score table.""" + scores = self._SliceDataForScoreTableCell(score_name, apm_config, + test_data_gen, + test_data_gen_params) + stats = self._ComputeScoreStats(scores) + + html = [] + items_id_prefix = (score_name + test_data_gen + test_data_gen_params + + apm_config) + if stats['count'] == 1: + # Show the only available score. + item_id = hashlib.md5(items_id_prefix.encode('utf-8')).hexdigest() + html.append('
{1:f}
'.format( + item_id, scores['score'].mean())) + html.append( + '
{}' + '
'.format(item_id, 'single value')) + else: + # Show stats. + for stat_name in ['min', 'max', 'mean', 'std dev']: + item_id = hashlib.md5( + (items_id_prefix + stat_name).encode('utf-8')).hexdigest() + html.append('
{1:f}
'.format( + item_id, stats[stat_name])) + html.append( + '
{}' + '
'.format(item_id, stat_name)) + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialogs(self, score_name, apm_configs, + test_data_gen_configs, anchor_data): + """Builds a set of score stats inspector dialogs.""" + html = [] + for apm_config in apm_configs: + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1]) + + html.append(''.format(dialog_id)) + + # Content. + html.append('
') + html.append( + '
APM config preset: {}
' + 'Test data generator: {} ({})
'. + format(self._FormatName(apm_config[0]), + self._FormatName(test_data_gen_info[0]), + test_data_gen_info[1])) + html.append( + self._BuildScoreStatsInspectorDialog( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1], anchor_data + (dialog_id, ))) + html.append('
') + + # Actions. + html.append('
') + html.append('') + html.append('
') + + html.append('
') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialog(self, score_name, apm_config, + test_data_gen, test_data_gen_params, + anchor_data): + """Builds one score stats inspector dialog.""" + scores = self._SliceDataForScoreTableCell(score_name, apm_config, + test_data_gen, + test_data_gen_params) + + capture_render_pairs = sorted( + self._FindUniqueTuples(scores, ['capture', 'render'])) + echo_simulators = sorted( + self._FindUniqueTuples(scores, ['echo_simulator'])) + + html = [ + '' + ] + + # Header. + html.append('') + for echo_simulator in echo_simulators: + html.append('') + html.append('') + + # Body. + html.append('') + for row, (capture, render) in enumerate(capture_render_pairs): + html.append(''.format( + capture, render)) + for col, echo_simulator in enumerate(echo_simulators): + score_tuple = self._SliceDataForScoreStatsTableCell( + scores, capture, render, echo_simulator[0]) + cell_class = 'r{}c{}'.format(row, col) + html.append(''.format( + cell_class, + self._BuildScoreStatsInspectorTableCell( + score_tuple, anchor_data + (cell_class, )))) + html.append('') + html.append('') + + html.append('
Capture-Render / Echo simulator' + self._FormatName(echo_simulator[0]) + '
{}
{}
{}
') + + # Placeholder for the audio inspector. + html.append('
') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorTableCell(self, score_tuple, anchor_data): + """Builds the content of a cell of a score stats inspector.""" + anchor = '&'.join(anchor_data) + html = [('
{}
' + '').format(score_tuple.score, anchor)] + + # Add all the available file paths as hidden data. + for field_name in score_tuple.keys(): + if field_name.endswith('_filepath'): + html.append( + ''.format( + field_name, score_tuple[field_name])) + + return self._NEW_LINE.join(html) + + def _SliceDataForScoreTableCell(self, score_name, apm_config, + test_data_gen, test_data_gen_params): + """Slices `self._scores_data_frame` to extract the data for a tab.""" + masks = [] + masks.append(self._scores_data_frame.eval_score_name == score_name) + masks.append(self._scores_data_frame.apm_config == apm_config) + masks.append(self._scores_data_frame.test_data_gen == test_data_gen) + masks.append(self._scores_data_frame.test_data_gen_params == + test_data_gen_params) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + return self._scores_data_frame[mask] + + @classmethod + def _SliceDataForScoreStatsTableCell(cls, scores, capture, render, + echo_simulator): + """Slices `scores` to extract the data for a tab.""" + masks = [] + + masks.append(scores.capture == capture) + masks.append(scores.render == render) + masks.append(scores.echo_simulator == echo_simulator) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + + sliced_data = scores[mask] + assert len(sliced_data) == 1, 'single score is expected' + return sliced_data.iloc[0] + + @classmethod + def _FindUniqueTuples(cls, data_frame, fields): + """Slices `data_frame` to a list of fields and finds unique tuples.""" + return data_frame[fields].drop_duplicates().values.tolist() + + @classmethod + def _ComputeScoreStats(cls, data_frame): + """Computes score stats.""" + scores = data_frame['score'] + return { + 'count': scores.count(), + 'min': scores.min(), + 'max': scores.max(), + 'mean': scores.mean(), + 'std dev': scores.std(), + } + + @classmethod + def _ScoreStatsInspectorDialogId(cls, score_name, apm_config, + test_data_gen, test_data_gen_params): + """Assigns a unique name to a dialog.""" + return 'score-stats-dialog-' + hashlib.md5( + 'score-stats-inspector-{}-{}-{}-{}'.format( + score_name, apm_config, test_data_gen, + test_data_gen_params).encode('utf-8')).hexdigest() + + @classmethod + def _Save(cls, output_filepath, html): + """Writes the HTML file. + + Args: + output_filepath: output file path. + html: string with the HTML content. + """ + with open(output_filepath, 'w') as f: + f.write(html) + + @classmethod + def _FormatName(cls, name): + """Formats a name. + + Args: + name: a string. + + Returns: + A copy of name in which underscores and dashes are replaced with a space. + """ + return re.sub(r'[_\-]', ' ', name) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py new file mode 100644 index 0000000000..412aa7c4e7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py @@ -0,0 +1,86 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the export module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import pyquery as pq + +from . import audioproc_wrapper +from . import collect_data +from . import eval_scores_factory +from . import evaluation +from . import export +from . import simulation +from . import test_data_generation_factory + + +class TestExport(unittest.TestCase): + """Unit tests for the export module. + """ + + _CLEAN_TMP_OUTPUT = True + + def setUp(self): + """Creates temporary data to export.""" + self._tmp_path = tempfile.mkdtemp() + + # Run a fake experiment to produce data to export. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + simulator.Run( + config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=[ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-880_1000.wav'), + ], + test_data_generator_names=['identity', 'white_noise'], + eval_score_names=['audio_level_peak', 'audio_level_mean'], + output_dir=self._tmp_path) + + # Export results. + p = collect_data.InstanceArgumentsParser() + args = p.parse_args(['--output_dir', self._tmp_path]) + src_path = collect_data.ConstructSrcPath(args) + self._data_to_export = collect_data.FindScores(src_path, args) + + def tearDown(self): + """Recursively deletes temporary folders.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + + (self._tmp_path)) + + def testCreateHtmlReport(self): + fn_out = os.path.join(self._tmp_path, 'results.html') + exporter = export.HtmlExport(fn_out) + exporter.Export(self._data_to_export) + + document = pq.PyQuery(filename=fn_out) + self.assertIsInstance(document, pq.PyQuery) + # TODO(alessiob): Use PyQuery API to check the HTML file. diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py new file mode 100644 index 0000000000..a7db7b4840 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py @@ -0,0 +1,75 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +from __future__ import division + +import logging +import os +import subprocess +import shutil +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import signal_processing + + +class ExternalVad(object): + def __init__(self, path_to_binary, name): + """Args: + path_to_binary: path to binary that accepts '-i ', '-o + '. There must be one float value per + 10ms audio + name: a name to identify the external VAD. Used for saving + the output as extvad_output-. + """ + self._path_to_binary = path_to_binary + self.name = name + assert os.path.exists(self._path_to_binary), (self._path_to_binary) + self._vad_output = None + + def Run(self, wav_file_path): + _signal = signal_processing.SignalProcessingUtils.LoadWav( + wav_file_path) + if _signal.channels != 1: + raise NotImplementedError('Multiple-channel' + ' annotations not implemented') + if _signal.frame_rate != 48000: + raise NotImplementedError('Frame rates ' + 'other than 48000 not implemented') + + tmp_path = tempfile.mkdtemp() + try: + output_file_path = os.path.join(tmp_path, self.name + '_vad.tmp') + subprocess.call([ + self._path_to_binary, '-i', wav_file_path, '-o', + output_file_path + ]) + self._vad_output = np.fromfile(output_file_path, np.float32) + except Exception as e: + logging.error('Error while running the ' + self.name + ' VAD (' + + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def GetVadOutput(self): + assert self._vad_output is not None + return self._vad_output + + @classmethod + def ConstructVadDict(cls, vad_paths, vad_names): + external_vads = {} + for path, name in zip(vad_paths, vad_names): + external_vads[name] = ExternalVad(path, name) + return external_vads diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py new file mode 100755 index 0000000000..f679f8c94a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py @@ -0,0 +1,25 @@ +#!/usr/bin/python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +import argparse +import numpy as np + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', required=True) + parser.add_argument('-o', required=True) + + args = parser.parse_args() + + array = np.arange(100, dtype=np.float32) + array.tofile(open(args.o, 'w')) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc new file mode 100644 index 0000000000..6f3b2d1dd7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +const char* const kErrorMessage = "-Out /path/to/output/file is mandatory"; + +// Writes fake output intended to be parsed by +// quality_assessment.eval_scores.PolqaScore. +void WriteOutputFile(absl::string_view output_file_path) { + RTC_CHECK_NE(output_file_path, ""); + std::ofstream out(std::string{output_file_path}); + RTC_CHECK(!out.bad()); + out << "* Fake Polqa output" << std::endl; + out << "FakeField1\tPolqaScore\tFakeField2" << std::endl; + out << "FakeValue1\t3.25\tFakeValue2" << std::endl; + out.close(); +} + +} // namespace + +int main(int argc, char* argv[]) { + // Find "-Out" and use its next argument as output file path. + RTC_CHECK_GE(argc, 3) << kErrorMessage; + const std::string kSoughtFlagName = "-Out"; + for (int i = 1; i < argc - 1; ++i) { + if (kSoughtFlagName.compare(argv[i]) == 0) { + WriteOutputFile(argv[i + 1]); + return 0; + } + } + RTC_FATAL() << kErrorMessage; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py new file mode 100644 index 0000000000..af022bd461 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py @@ -0,0 +1,97 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Input mixer module. +""" + +import logging +import os + +from . import exceptions +from . import signal_processing + + +class ApmInputMixer(object): + """Class to mix a set of audio segments down to the APM input.""" + + _HARD_CLIPPING_LOG_MSG = 'hard clipping detected in the mixed signal' + + def __init__(self): + pass + + @classmethod + def HardClippingLogMessage(cls): + """Returns the log message used when hard clipping is detected in the mix. + + This method is mainly intended to be used by the unit tests. + """ + return cls._HARD_CLIPPING_LOG_MSG + + @classmethod + def Mix(cls, output_path, capture_input_filepath, echo_filepath): + """Mixes capture and echo. + + Creates the overall capture input for APM by mixing the "echo-free" capture + signal with the echo signal (e.g., echo simulated via the + echo_path_simulation module). + + The echo signal cannot be shorter than the capture signal and the generated + mix will have the same duration of the capture signal. The latter property + is enforced in order to let the input of APM and the reference signal + created by TestDataGenerator have the same length (required for the + evaluation step). + + Hard-clipping may occur in the mix; a warning is raised when this happens. + + If `echo_filepath` is None, nothing is done and `capture_input_filepath` is + returned. + + Args: + speech: AudioSegment instance. + echo_path: AudioSegment instance or None. + + Returns: + Path to the mix audio track file. + """ + if echo_filepath is None: + return capture_input_filepath + + # Build the mix output file name as a function of the echo file name. + # This ensures that if the internal parameters of the echo path simulator + # change, no erroneous cache hit occurs. + echo_file_name, _ = os.path.splitext(os.path.split(echo_filepath)[1]) + capture_input_file_name, _ = os.path.splitext( + os.path.split(capture_input_filepath)[1]) + mix_filepath = os.path.join( + output_path, + 'mix_capture_{}_{}.wav'.format(capture_input_file_name, + echo_file_name)) + + # Create the mix if not done yet. + mix = None + if not os.path.exists(mix_filepath): + echo_free_capture = signal_processing.SignalProcessingUtils.LoadWav( + capture_input_filepath) + echo = signal_processing.SignalProcessingUtils.LoadWav( + echo_filepath) + + if signal_processing.SignalProcessingUtils.CountSamples(echo) < ( + signal_processing.SignalProcessingUtils.CountSamples( + echo_free_capture)): + raise exceptions.InputMixerException( + 'echo cannot be shorter than capture') + + mix = echo_free_capture.overlay(echo) + signal_processing.SignalProcessingUtils.SaveWav(mix_filepath, mix) + + # Check if hard clipping occurs. + if mix is None: + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + if signal_processing.SignalProcessingUtils.DetectHardClipping(mix): + logging.warning(cls._HARD_CLIPPING_LOG_MSG) + + return mix_filepath diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py new file mode 100644 index 0000000000..4fd5e4f1ee --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py @@ -0,0 +1,140 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the input mixer module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import mock + +from . import exceptions +from . import input_mixer +from . import signal_processing + + +class TestApmInputMixer(unittest.TestCase): + """Unit tests for the ApmInputMixer class. + """ + + # Audio track file names created in setUp(). + _FILENAMES = ['capture', 'echo_1', 'echo_2', 'shorter', 'longer'] + + # Target peak power level (dBFS) of each audio track file created in setUp(). + # These values are hand-crafted in order to make saturation happen when + # capture and echo_2 are mixed and the contrary for capture and echo_1. + # None means that the power is not changed. + _MAX_PEAK_POWER_LEVELS = [-10.0, -5.0, 0.0, None, None] + + # Audio track file durations in milliseconds. + _DURATIONS = [1000, 1000, 1000, 800, 1200] + + _SAMPLE_RATE = 48000 + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create audio track files. + self._audio_tracks = {} + for filename, peak_power, duration in zip(self._FILENAMES, + self._MAX_PEAK_POWER_LEVELS, + self._DURATIONS): + audio_track_filepath = os.path.join(self._tmp_path, + '{}.wav'.format(filename)) + + # Create a pure tone with the target peak power level. + template = signal_processing.SignalProcessingUtils.GenerateSilence( + duration=duration, sample_rate=self._SAMPLE_RATE) + signal = signal_processing.SignalProcessingUtils.GeneratePureTone( + template) + if peak_power is not None: + signal = signal.apply_gain(-signal.max_dBFS + peak_power) + + signal_processing.SignalProcessingUtils.SaveWav( + audio_track_filepath, signal) + self._audio_tracks[filename] = { + 'filepath': + audio_track_filepath, + 'num_samples': + signal_processing.SignalProcessingUtils.CountSamples(signal) + } + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testCheckMixSameDuration(self): + """Checks the duration when mixing capture and echo with same duration.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual( + self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testRejectShorterEcho(self): + """Rejects echo signals that are shorter than the capture signal.""" + try: + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['shorter']['filepath']) + self.fail('no exception raised') + except exceptions.InputMixerException: + pass + + def testCheckMixDurationWithLongerEcho(self): + """Checks the duration when mixing an echo longer than the capture.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['longer']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual( + self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testCheckOutputFileNamesConflict(self): + """Checks that different echo files lead to different output file names.""" + mix1_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix1_filepath)) + + mix2_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + self.assertTrue(os.path.exists(mix2_filepath)) + + self.assertNotEqual(mix1_filepath, mix2_filepath) + + def testHardClippingLogExpected(self): + """Checks that hard clipping warning is raised when occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + logging.warning.assert_called_once_with( + input_mixer.ApmInputMixer.HardClippingLogMessage()) + + def testHardClippingLogNotExpected(self): + """Checks that hard clipping warning is not raised when not occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertNotIn( + mock.call(input_mixer.ApmInputMixer.HardClippingLogMessage()), + logging.warning.call_args_list) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py new file mode 100644 index 0000000000..b64fdcca89 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py @@ -0,0 +1,68 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Input signal creator module. +""" + +from . import exceptions +from . import signal_processing + + +class InputSignalCreator(object): + """Input signal creator class. + """ + + @classmethod + def Create(cls, name, raw_params): + """Creates a input signal and its metadata. + + Args: + name: Input signal creator name. + raw_params: Tuple of parameters to pass to the specific signal creator. + + Returns: + (AudioSegment, dict) tuple. + """ + try: + signal = {} + params = {} + + if name == 'pure_tone': + params['frequency'] = float(raw_params[0]) + params['duration'] = int(raw_params[1]) + signal = cls._CreatePureTone(params['frequency'], + params['duration']) + else: + raise exceptions.InputSignalCreatorException( + 'Invalid input signal creator name') + + # Complete metadata. + params['signal'] = name + + return signal, params + except (TypeError, AssertionError) as e: + raise exceptions.InputSignalCreatorException( + 'Invalid signal creator parameters: {}'.format(e)) + + @classmethod + def _CreatePureTone(cls, frequency, duration): + """ + Generates a pure tone at 48000 Hz. + + Args: + frequency: Float in (0-24000] (Hz). + duration: Integer (milliseconds). + + Returns: + AudioSegment instance. + """ + assert 0 < frequency <= 24000 + assert duration > 0 + template = signal_processing.SignalProcessingUtils.GenerateSilence( + duration) + return signal_processing.SignalProcessingUtils.GeneratePureTone( + template, frequency) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css new file mode 100644 index 0000000000..2f406bb002 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css @@ -0,0 +1,32 @@ +/* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +td.selected-score { + background-color: #DDD; +} + +td.single-score-cell{ + text-align: center; +} + +.audio-inspector { + text-align: center; +} + +.audio-inspector div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} + +.audio-inspector div div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js new file mode 100644 index 0000000000..8e47411058 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js @@ -0,0 +1,376 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +/** + * Opens the score stats inspector dialog. + * @param {String} dialogId: identifier of the dialog to show. + * @return {DOMElement} The dialog element that has been opened. + */ +function openScoreStatsInspector(dialogId) { + var dialog = document.getElementById(dialogId); + dialog.showModal(); + return dialog; +} + +/** + * Closes the score stats inspector dialog. + */ +function closeScoreStatsInspector() { + var dialog = document.querySelector('dialog[open]'); + if (dialog == null) + return; + dialog.close(); +} + +/** + * Audio inspector class. + * @constructor + */ +function AudioInspector() { + console.debug('Creating an AudioInspector instance.'); + this.audioPlayer_ = new Audio(); + this.metadata_ = {}; + this.currentScore_ = null; + this.audioInspector_ = null; + this.snackbarContainer_ = document.querySelector('#snackbar'); + + // Get base URL without anchors. + this.baseUrl_ = window.location.href; + var index = this.baseUrl_.indexOf('#'); + if (index > 0) + this.baseUrl_ = this.baseUrl_.substr(0, index) + console.info('Base URL set to "' + window.location.href + '".'); + + window.event.stopPropagation(); + this.createTextAreasForCopy_(); + this.createAudioInspector_(); + this.initializeEventHandlers_(); + + // When MDL is ready, parse the anchor (if any) to show the requested + // experiment. + var self = this; + document.querySelectorAll('header a')[0].addEventListener( + 'mdl-componentupgraded', function() { + if (!self.parseWindowAnchor()) { + // If not experiment is requested, open the first section. + console.info('No anchor parsing, opening the first section.'); + document.querySelectorAll('header a > span')[0].click(); + } + }); +} + +/** + * Parse the anchor in the window URL. + * @return {bool} True if the parsing succeeded. + */ +AudioInspector.prototype.parseWindowAnchor = function() { + var index = location.href.indexOf('#'); + if (index == -1) { + console.debug('No # found in the URL.'); + return false; + } + + var anchor = location.href.substr(index - location.href.length + 1); + console.info('Anchor changed: "' + anchor + '".'); + + var parts = anchor.split('&'); + if (parts.length != 3) { + console.info('Ignoring anchor with invalid number of fields.'); + return false; + } + + var openDialog = document.querySelector('dialog[open]'); + try { + // Open the requested dialog if not already open. + if (!openDialog || openDialog.id != parts[1]) { + !openDialog || openDialog.close(); + document.querySelectorAll('header a > span')[ + parseInt(parts[0].substr(1))].click(); + openDialog = openScoreStatsInspector(parts[1]); + } + + // Trigger click on cell. + var cell = openDialog.querySelector('td.' + parts[2]); + cell.focus(); + cell.click(); + + this.showNotification_('Experiment selected.'); + return true; + } catch (e) { + this.showNotification_('Cannot select experiment :('); + console.error('Exception caught while selecting experiment: "' + e + '".'); + } + + return false; +} + +/** + * Set up the inspector for a new score. + * @param {DOMElement} element: Element linked to the selected score. + */ +AudioInspector.prototype.selectedScoreChange = function(element) { + if (this.currentScore_ == element) { return; } + if (this.currentScore_ != null) { + this.currentScore_.classList.remove('selected-score'); + } + this.currentScore_ = element; + this.currentScore_.classList.add('selected-score'); + this.stopAudio(); + + // Read metadata. + var matches = element.querySelectorAll('input[type=hidden]'); + this.metadata_ = {}; + for (var index = 0; index < matches.length; ++index) { + this.metadata_[matches[index].name] = matches[index].value; + } + + // Show the audio inspector interface. + var container = element.parentNode.parentNode.parentNode.parentNode; + var audioInspectorPlaceholder = container.querySelector( + '.audio-inspector-placeholder'); + this.moveInspector_(audioInspectorPlaceholder); +}; + +/** + * Stop playing audio. + */ +AudioInspector.prototype.stopAudio = function() { + console.info('Pausing audio play out.'); + this.audioPlayer_.pause(); +}; + +/** + * Show a text message using the snackbar. + */ +AudioInspector.prototype.showNotification_ = function(text) { + try { + this.snackbarContainer_.MaterialSnackbar.showSnackbar({ + message: text, timeout: 2000}); + } catch (e) { + // Fallback to an alert. + alert(text); + console.warn('Cannot use snackbar: "' + e + '"'); + } +} + +/** + * Move the audio inspector DOM node into the given parent. + * @param {DOMElement} newParentNode: New parent for the inspector. + */ +AudioInspector.prototype.moveInspector_ = function(newParentNode) { + newParentNode.appendChild(this.audioInspector_); +}; + +/** + * Play audio file from url. + * @param {string} metadataFieldName: Metadata field name. + */ +AudioInspector.prototype.playAudio = function(metadataFieldName) { + if (this.metadata_[metadataFieldName] == undefined) { return; } + if (this.metadata_[metadataFieldName] == 'None') { + alert('The selected stream was not used during the experiment.'); + return; + } + this.stopAudio(); + this.audioPlayer_.src = this.metadata_[metadataFieldName]; + console.debug('Audio source URL: "' + this.audioPlayer_.src + '"'); + this.audioPlayer_.play(); + console.info('Playing out audio.'); +}; + +/** + * Create hidden text areas to copy URLs. + * + * For each dialog, one text area is created since it is not possible to select + * text on a text area outside of the active dialog. + */ +AudioInspector.prototype.createTextAreasForCopy_ = function() { + var self = this; + document.querySelectorAll('dialog.mdl-dialog').forEach(function(element) { + var textArea = document.createElement("textarea"); + textArea.classList.add('url-copy'); + textArea.style.position = 'fixed'; + textArea.style.bottom = 0; + textArea.style.left = 0; + textArea.style.width = '2em'; + textArea.style.height = '2em'; + textArea.style.border = 'none'; + textArea.style.outline = 'none'; + textArea.style.boxShadow = 'none'; + textArea.style.background = 'transparent'; + textArea.style.fontSize = '6px'; + element.appendChild(textArea); + }); +} + +/** + * Create audio inspector. + */ +AudioInspector.prototype.createAudioInspector_ = function() { + var buttonIndex = 0; + function getButtonHtml(icon, toolTipText, caption, metadataFieldName) { + var buttonId = 'audioInspectorButton' + buttonIndex++; + html = caption == null ? '' : caption; + html += '' + + return html; + } + + // TODO(alessiob): Add timeline and highlight current track by changing icon + // color. + + this.audioInspector_ = document.createElement('div'); + this.audioInspector_.classList.add('audio-inspector'); + this.audioInspector_.innerHTML = + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Simulated echo', 'Ein', + 'echo_filepath') + + '
' + + '
' + + getButtonHtml('stop', 'Stop playing [S]', null, '__stop__') + + '
' + + '
' + + getButtonHtml('play_arrow', 'Render stream', 'Rin', + 'render_filepath') + + '
' + + '
' + + '
' + + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Capture stream (APM input) [1]', + 'Y\'in', 'capture_filepath') + + '
' + + '
APM
' + + '
' + + getButtonHtml('play_arrow', 'APM output [2]', 'Yout', + 'apm_output_filepath') + + '
' + + '
' + + '
' + + '
' + + '
' + + '
' + + getButtonHtml('play_arrow', 'Echo-free capture stream', + 'Yin', 'echo_free_capture_filepath') + + '
' + + '
' + + getButtonHtml('play_arrow', 'Clean capture stream', + 'Yclean', 'clean_capture_input_filepath') + + '
' + + '
' + + getButtonHtml('play_arrow', 'APM reference [3]', 'Yref', + 'apm_reference_filepath') + + '
' + + '
' + + '
'; + + // Add an invisible node as initial container for the audio inspector. + var parent = document.createElement('div'); + parent.style.display = 'none'; + this.moveInspector_(parent); + document.body.appendChild(parent); +}; + +/** + * Initialize event handlers. + */ +AudioInspector.prototype.initializeEventHandlers_ = function() { + var self = this; + + // Score cells. + document.querySelectorAll('td.single-score-cell').forEach(function(element) { + element.onclick = function() { + self.selectedScoreChange(this); + } + }); + + // Copy anchor URLs icons. + if (document.queryCommandSupported('copy')) { + document.querySelectorAll('td.single-score-cell button').forEach( + function(element) { + element.onclick = function() { + // Find the text area in the dialog. + var textArea = element.closest('dialog').querySelector( + 'textarea.url-copy'); + + // Copy. + textArea.value = self.baseUrl_ + '#' + element.getAttribute( + 'data-anchor'); + textArea.select(); + try { + if (!document.execCommand('copy')) + throw 'Copy returned false'; + self.showNotification_('Experiment URL copied.'); + } catch (e) { + self.showNotification_('Cannot copy experiment URL :('); + console.error(e); + } + } + }); + } else { + self.showNotification_( + 'The copy command is disabled. URL copy is not enabled.'); + } + + // Audio inspector buttons. + this.audioInspector_.querySelectorAll('button').forEach(function(element) { + var target = element.querySelector('input[type=hidden]'); + if (target == null) { return; } + element.onclick = function() { + if (target.value == '__stop__') { + self.stopAudio(); + } else { + self.playAudio(target.value); + } + }; + }); + + // Dialog close handlers. + var dialogs = document.querySelectorAll('dialog').forEach(function(element) { + element.onclose = function() { + self.stopAudio(); + } + }); + + // Keyboard shortcuts. + window.onkeyup = function(e) { + var key = e.keyCode ? e.keyCode : e.which; + switch (key) { + case 49: // 1. + self.playAudio('capture_filepath'); + break; + case 50: // 2. + self.playAudio('apm_output_filepath'); + break; + case 51: // 3. + self.playAudio('apm_reference_filepath'); + break; + case 83: // S. + case 115: // s. + self.stopAudio(); + break; + } + }; + + // Hash change. + window.onhashchange = function(e) { + self.parseWindowAnchor(); + } +}; diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py new file mode 100644 index 0000000000..95e801903d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py @@ -0,0 +1,359 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Signal processing utility module. +""" + +import array +import logging +import os +import sys +import enum + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +try: + import pydub + import pydub.generators +except ImportError: + logging.critical('Cannot import the third-party Python package pydub') + sys.exit(1) + +try: + import scipy.signal + import scipy.fftpack +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import exceptions + + +class SignalProcessingUtils(object): + """Collection of signal processing utilities. + """ + + @enum.unique + class MixPadding(enum.Enum): + NO_PADDING = 0 + ZERO_PADDING = 1 + LOOP = 2 + + def __init__(self): + pass + + @classmethod + def LoadWav(cls, filepath, channels=1): + """Loads wav file. + + Args: + filepath: path to the wav audio track file to load. + channels: number of channels (downmixing to mono by default). + + Returns: + AudioSegment instance. + """ + if not os.path.exists(filepath): + logging.error('cannot find the <%s> audio track file', filepath) + raise exceptions.FileNotFoundError() + return pydub.AudioSegment.from_file(filepath, + format='wav', + channels=channels) + + @classmethod + def SaveWav(cls, output_filepath, signal): + """Saves wav file. + + Args: + output_filepath: path to the wav audio track file to save. + signal: AudioSegment instance. + """ + return signal.export(output_filepath, format='wav') + + @classmethod + def CountSamples(cls, signal): + """Number of samples per channel. + + Args: + signal: AudioSegment instance. + + Returns: + An integer. + """ + number_of_samples = len(signal.get_array_of_samples()) + assert signal.channels > 0 + assert number_of_samples % signal.channels == 0 + return number_of_samples / signal.channels + + @classmethod + def GenerateSilence(cls, duration=1000, sample_rate=48000): + """Generates silence. + + This method can also be used to create a template AudioSegment instance. + A template can then be used with other Generate*() methods accepting an + AudioSegment instance as argument. + + Args: + duration: duration in ms. + sample_rate: sample rate. + + Returns: + AudioSegment instance. + """ + return pydub.AudioSegment.silent(duration, sample_rate) + + @classmethod + def GeneratePureTone(cls, template, frequency=440.0): + """Generates a pure tone. + + The pure tone is generated with the same duration and in the same format of + the given template signal. + + Args: + template: AudioSegment instance. + frequency: Frequency of the pure tone in Hz. + + Return: + AudioSegment instance. + """ + if frequency > template.frame_rate >> 1: + raise exceptions.SignalProcessingException('Invalid frequency') + + generator = pydub.generators.Sine(sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8, + freq=frequency) + + return generator.to_audio_segment(duration=len(template), volume=0.0) + + @classmethod + def GenerateWhiteNoise(cls, template): + """Generates white noise. + + The white noise is generated with the same duration and in the same format + of the given template signal. + + Args: + template: AudioSegment instance. + + Return: + AudioSegment instance. + """ + generator = pydub.generators.WhiteNoise( + sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8) + return generator.to_audio_segment(duration=len(template), volume=0.0) + + @classmethod + def AudioSegmentToRawData(cls, signal): + samples = signal.get_array_of_samples() + if samples.typecode != 'h': + raise exceptions.SignalProcessingException( + 'Unsupported samples type') + return np.array(signal.get_array_of_samples(), np.int16) + + @classmethod + def Fft(cls, signal, normalize=True): + if signal.channels != 1: + raise NotImplementedError('multiple-channel FFT not implemented') + x = cls.AudioSegmentToRawData(signal).astype(np.float32) + if normalize: + x /= max(abs(np.max(x)), 1.0) + y = scipy.fftpack.fft(x) + return y[:len(y) / 2] + + @classmethod + def DetectHardClipping(cls, signal, threshold=2): + """Detects hard clipping. + + Hard clipping is simply detected by counting samples that touch either the + lower or upper bound too many times in a row (according to `threshold`). + The presence of a single sequence of samples meeting such property is enough + to label the signal as hard clipped. + + Args: + signal: AudioSegment instance. + threshold: minimum number of samples at full-scale in a row. + + Returns: + True if hard clipping is detect, False otherwise. + """ + if signal.channels != 1: + raise NotImplementedError( + 'multiple-channel clipping not implemented') + if signal.sample_width != 2: # Note that signal.sample_width is in bytes. + raise exceptions.SignalProcessingException( + 'hard-clipping detection only supported for 16 bit samples') + samples = cls.AudioSegmentToRawData(signal) + + # Detect adjacent clipped samples. + samples_type_info = np.iinfo(samples.dtype) + mask_min = samples == samples_type_info.min + mask_max = samples == samples_type_info.max + + def HasLongSequence(vector, min_legth=threshold): + """Returns True if there are one or more long sequences of True flags.""" + seq_length = 0 + for b in vector: + seq_length = seq_length + 1 if b else 0 + if seq_length >= min_legth: + return True + return False + + return HasLongSequence(mask_min) or HasLongSequence(mask_max) + + @classmethod + def ApplyImpulseResponse(cls, signal, impulse_response): + """Applies an impulse response to a signal. + + Args: + signal: AudioSegment instance. + impulse_response: list or numpy vector of float values. + + Returns: + AudioSegment instance. + """ + # Get samples. + assert signal.channels == 1, ( + 'multiple-channel recordings not supported') + samples = signal.get_array_of_samples() + + # Convolve. + logging.info( + 'applying %d order impulse response to a signal lasting %d ms', + len(impulse_response), len(signal)) + convolved_samples = scipy.signal.fftconvolve(in1=samples, + in2=impulse_response, + mode='full').astype( + np.int16) + logging.info('convolution computed') + + # Cast. + convolved_samples = array.array(signal.array_type, convolved_samples) + + # Verify. + logging.debug('signal length: %d samples', len(samples)) + logging.debug('convolved signal length: %d samples', + len(convolved_samples)) + assert len(convolved_samples) > len(samples) + + # Generate convolved signal AudioSegment instance. + convolved_signal = pydub.AudioSegment(data=convolved_samples, + metadata={ + 'sample_width': + signal.sample_width, + 'frame_rate': + signal.frame_rate, + 'frame_width': + signal.frame_width, + 'channels': signal.channels, + }) + assert len(convolved_signal) > len(signal) + + return convolved_signal + + @classmethod + def Normalize(cls, signal): + """Normalizes a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return signal.apply_gain(-signal.max_dBFS) + + @classmethod + def Copy(cls, signal): + """Makes a copy os a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return pydub.AudioSegment(data=signal.get_array_of_samples(), + metadata={ + 'sample_width': signal.sample_width, + 'frame_rate': signal.frame_rate, + 'frame_width': signal.frame_width, + 'channels': signal.channels, + }) + + @classmethod + def MixSignals(cls, + signal, + noise, + target_snr=0.0, + pad_noise=MixPadding.NO_PADDING): + """Mixes `signal` and `noise` with a target SNR. + + Mix `signal` and `noise` with a desired SNR by scaling `noise`. + If the target SNR is +/- infinite, a copy of signal/noise is returned. + If `signal` is shorter than `noise`, the length of the mix equals that of + `signal`. Otherwise, the mix length depends on whether padding is applied. + When padding is not applied, that is `pad_noise` is set to NO_PADDING + (default), the mix length equals that of `noise` - i.e., `signal` is + truncated. Otherwise, `noise` is extended and the resulting mix has the same + length of `signal`. + + Args: + signal: AudioSegment instance (signal). + noise: AudioSegment instance (noise). + target_snr: float, numpy.Inf or -numpy.Inf (dB). + pad_noise: SignalProcessingUtils.MixPadding, default: NO_PADDING. + + Returns: + An AudioSegment instance. + """ + # Handle infinite target SNR. + if target_snr == -np.Inf: + # Return a copy of noise. + logging.warning('SNR = -Inf, returning noise') + return cls.Copy(noise) + elif target_snr == np.Inf: + # Return a copy of signal. + logging.warning('SNR = +Inf, returning signal') + return cls.Copy(signal) + + # Check signal and noise power. + signal_power = float(signal.dBFS) + noise_power = float(noise.dBFS) + if signal_power == -np.Inf: + logging.error('signal has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + if noise_power == -np.Inf: + logging.error('noise has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + + # Mix. + gain_db = signal_power - noise_power - target_snr + signal_duration = len(signal) + noise_duration = len(noise) + if signal_duration <= noise_duration: + # Ignore `pad_noise`, `noise` is truncated if longer that `signal`, the + # mix will have the same length of `signal`. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.NO_PADDING: + # `signal` is longer than `noise`, but no padding is applied to `noise`. + # Truncate `signal`. + return noise.overlay(signal, gain_during_overlay=gain_db) + elif pad_noise == cls.MixPadding.ZERO_PADDING: + # TODO(alessiob): Check that this works as expected. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.LOOP: + # `signal` is longer than `noise`, extend `noise` by looping. + return signal.overlay(noise.apply_gain(gain_db), loop=True) + else: + raise exceptions.SignalProcessingException('invalid padding type') diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py new file mode 100644 index 0000000000..881fb66800 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py @@ -0,0 +1,183 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the signal_processing module. +""" + +import unittest + +import numpy as np +import pydub + +from . import exceptions +from . import signal_processing + + +class TestSignalProcessing(unittest.TestCase): + """Unit tests for the signal_processing module. + """ + + def testMixSignals(self): + # Generate a template signal with which white noise can be generated. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Generate two distinct AudioSegment instances with 1 second of white noise. + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + # Extract samples. + signal_samples = signal.get_array_of_samples() + noise_samples = noise.get_array_of_samples() + + # Test target SNR -Inf (noise expected). + mix_neg_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, -np.Inf) + self.assertTrue(len(noise), len(mix_neg_inf)) # Check duration. + mix_neg_inf_samples = mix_neg_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(noise_samples, mix_neg_inf_samples)])) + + # Test target SNR 0.0 (different data expected). + mix_0 = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, 0.0) + self.assertTrue(len(signal), len(mix_0)) # Check duration. + self.assertTrue(len(noise), len(mix_0)) + mix_0_samples = mix_0.get_array_of_samples() + self.assertTrue( + any([x != y for x, y in zip(signal_samples, mix_0_samples)])) + self.assertTrue( + any([x != y for x, y in zip(noise_samples, mix_0_samples)])) + + # Test target SNR +Inf (signal expected). + mix_pos_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, np.Inf) + self.assertTrue(len(signal), len(mix_pos_inf)) # Check duration. + mix_pos_inf_samples = mix_pos_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(signal_samples, mix_pos_inf_samples)])) + + def testMixSignalsMinInfPower(self): + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + signal, silence, 0.0) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + silence, signal, 0.0) + + def testMixSignalNoiseDifferentLengths(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000)) + + # When the signal is shorter than the noise, the mix length always equals + # that of the signal regardless of whether padding is applied. + # No noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + self.assertEqual(len(shorter), len(mix)) + + # When the signal is longer than the noise, the mix length depends on + # whether padding is applied. + # No noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + self.assertEqual(len(longer), len(mix)) + + def testMixSignalNoisePaddingTypes(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000), 440.0) + + # Zero padding: expect pure tone only in 1-2s. + mix_zero_pad = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding. + ZERO_PADDING) + + # Loop: expect pure tone plus noise in 1-2s. + mix_loop = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP) + + def Energy(signal): + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + signal).astype(np.float32) + return np.sum(samples * samples) + + e_mix_zero_pad = Energy(mix_zero_pad[-1000:]) + e_mix_loop = Energy(mix_loop[-1000:]) + self.assertLess(0, e_mix_zero_pad) + self.assertLess(e_mix_zero_pad, e_mix_loop) + + def testMixSignalSnr(self): + # Test signals. + tone_low = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 250.0) + tone_high = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 3000.0) + + def ToneAmplitudes(mix): + """Returns the amplitude of the coefficients #16 and #192, which + correspond to the tones at 250 and 3k Hz respectively.""" + mix_fft = np.absolute( + signal_processing.SignalProcessingUtils.Fft(mix)) + return mix_fft[16], mix_fft[192] + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, noise=tone_high, target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, noise=tone_low, target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, noise=tone_high, target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, noise=tone_low, target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py new file mode 100644 index 0000000000..69b3a1624e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py @@ -0,0 +1,446 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""APM module simulator. +""" + +import logging +import os + +from . import annotations +from . import data_access +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import eval_scores +from . import exceptions +from . import input_mixer +from . import input_signal_creator +from . import signal_processing +from . import test_data_generation + + +class ApmModuleSimulator(object): + """Audio processing module (APM) simulator class. + """ + + _TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) + _EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES + + _PREFIX_APM_CONFIG = 'apmcfg-' + _PREFIX_CAPTURE = 'capture-' + _PREFIX_RENDER = 'render-' + _PREFIX_ECHO_SIMULATOR = 'echosim-' + _PREFIX_TEST_DATA_GEN = 'datagen-' + _PREFIX_TEST_DATA_GEN_PARAMS = 'datagen_params-' + _PREFIX_SCORE = 'score-' + + def __init__(self, + test_data_generator_factory, + evaluation_score_factory, + ap_wrapper, + evaluator, + external_vads=None): + if external_vads is None: + external_vads = {} + self._test_data_generator_factory = test_data_generator_factory + self._evaluation_score_factory = evaluation_score_factory + self._audioproc_wrapper = ap_wrapper + self._evaluator = evaluator + self._annotator = annotations.AudioAnnotationsExtractor( + annotations.AudioAnnotationsExtractor.VadType.ENERGY_THRESHOLD + | annotations.AudioAnnotationsExtractor.VadType.WEBRTC_COMMON_AUDIO + | annotations.AudioAnnotationsExtractor.VadType.WEBRTC_APM, + external_vads) + + # Init. + self._test_data_generator_factory.SetOutputDirectoryPrefix( + self._PREFIX_TEST_DATA_GEN_PARAMS) + self._evaluation_score_factory.SetScoreFilenamePrefix( + self._PREFIX_SCORE) + + # Properties for each run. + self._base_output_path = None + self._output_cache_path = None + self._test_data_generators = None + self._evaluation_score_workers = None + self._config_filepaths = None + self._capture_input_filepaths = None + self._render_input_filepaths = None + self._echo_path_simulator_class = None + + @classmethod + def GetPrefixApmConfig(cls): + return cls._PREFIX_APM_CONFIG + + @classmethod + def GetPrefixCapture(cls): + return cls._PREFIX_CAPTURE + + @classmethod + def GetPrefixRender(cls): + return cls._PREFIX_RENDER + + @classmethod + def GetPrefixEchoSimulator(cls): + return cls._PREFIX_ECHO_SIMULATOR + + @classmethod + def GetPrefixTestDataGenerator(cls): + return cls._PREFIX_TEST_DATA_GEN + + @classmethod + def GetPrefixTestDataGeneratorParameters(cls): + return cls._PREFIX_TEST_DATA_GEN_PARAMS + + @classmethod + def GetPrefixScore(cls): + return cls._PREFIX_SCORE + + def Run(self, + config_filepaths, + capture_input_filepaths, + test_data_generator_names, + eval_score_names, + output_dir, + render_input_filepaths=None, + echo_path_simulator_name=( + echo_path_simulation.NoEchoPathSimulator.NAME)): + """Runs the APM simulation. + + Initializes paths and required instances, then runs all the simulations. + The render input can be optionally added. If added, the number of capture + input audio tracks and the number of render input audio tracks have to be + equal. The two lists are used to form pairs of capture and render input. + + Args: + config_filepaths: set of APM configuration files to test. + capture_input_filepaths: set of capture input audio track files to test. + test_data_generator_names: set of test data generator names to test. + eval_score_names: set of evaluation score names to test. + output_dir: base path to the output directory for wav files and outcomes. + render_input_filepaths: set of render input audio track files to test. + echo_path_simulator_name: name of the echo path simulator to use when + render input is provided. + """ + assert render_input_filepaths is None or ( + len(capture_input_filepaths) == len(render_input_filepaths)), ( + 'render input set size not matching input set size') + assert render_input_filepaths is None or echo_path_simulator_name in ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES), ( + 'invalid echo path simulator') + self._base_output_path = os.path.abspath(output_dir) + + # Output path used to cache the data shared across simulations. + self._output_cache_path = os.path.join(self._base_output_path, + '_cache') + + # Instance test data generators. + self._test_data_generators = [ + self._test_data_generator_factory.GetInstance( + test_data_generators_class=( + self._TEST_DATA_GENERATOR_CLASSES[name])) + for name in (test_data_generator_names) + ] + + # Instance evaluation score workers. + self._evaluation_score_workers = [ + self._evaluation_score_factory.GetInstance( + evaluation_score_class=self._EVAL_SCORE_WORKER_CLASSES[name]) + for (name) in eval_score_names + ] + + # Set APM configuration file paths. + self._config_filepaths = self._CreatePathsCollection(config_filepaths) + + # Set probing signal file paths. + if render_input_filepaths is None: + # Capture input only. + self._capture_input_filepaths = self._CreatePathsCollection( + capture_input_filepaths) + self._render_input_filepaths = None + else: + # Set both capture and render input signals. + self._SetTestInputSignalFilePaths(capture_input_filepaths, + render_input_filepaths) + + # Set the echo path simulator class. + self._echo_path_simulator_class = ( + echo_path_simulation.EchoPathSimulator. + REGISTERED_CLASSES[echo_path_simulator_name]) + + self._SimulateAll() + + def _SimulateAll(self): + """Runs all the simulations. + + Iterates over the combinations of APM configurations, probing signals, and + test data generators. This method is mainly responsible for the creation of + the cache and output directories required in order to call _Simulate(). + """ + without_render_input = self._render_input_filepaths is None + + # Try different APM config files. + for config_name in self._config_filepaths: + config_filepath = self._config_filepaths[config_name] + + # Try different capture-render pairs. + for capture_input_name in self._capture_input_filepaths: + # Output path for the capture signal annotations. + capture_annotations_cache_path = os.path.join( + self._output_cache_path, + self._PREFIX_CAPTURE + capture_input_name) + data_access.MakeDirectory(capture_annotations_cache_path) + + # Capture. + capture_input_filepath = self._capture_input_filepaths[ + capture_input_name] + if not os.path.exists(capture_input_filepath): + # If the input signal file does not exist, try to create using the + # available input signal creators. + self._CreateInputSignal(capture_input_filepath) + assert os.path.exists(capture_input_filepath) + self._ExtractCaptureAnnotations( + capture_input_filepath, capture_annotations_cache_path) + + # Render and simulated echo path (optional). + render_input_filepath = None if without_render_input else ( + self._render_input_filepaths[capture_input_name]) + render_input_name = '(none)' if without_render_input else ( + self._ExtractFileName(render_input_filepath)) + echo_path_simulator = (echo_path_simulation_factory. + EchoPathSimulatorFactory.GetInstance( + self._echo_path_simulator_class, + render_input_filepath)) + + # Try different test data generators. + for test_data_generators in self._test_data_generators: + logging.info( + 'APM config preset: <%s>, capture: <%s>, render: <%s>,' + 'test data generator: <%s>, echo simulator: <%s>', + config_name, capture_input_name, render_input_name, + test_data_generators.NAME, echo_path_simulator.NAME) + + # Output path for the generated test data. + test_data_cache_path = os.path.join( + capture_annotations_cache_path, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(test_data_cache_path) + logging.debug('test data cache path: <%s>', + test_data_cache_path) + + # Output path for the echo simulator and APM input mixer output. + echo_test_data_cache_path = os.path.join( + test_data_cache_path, + 'echosim-{}'.format(echo_path_simulator.NAME)) + data_access.MakeDirectory(echo_test_data_cache_path) + logging.debug('echo test data cache path: <%s>', + echo_test_data_cache_path) + + # Full output path. + output_path = os.path.join( + self._base_output_path, + self._PREFIX_APM_CONFIG + config_name, + self._PREFIX_CAPTURE + capture_input_name, + self._PREFIX_RENDER + render_input_name, + self._PREFIX_ECHO_SIMULATOR + echo_path_simulator.NAME, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(output_path) + logging.debug('output path: <%s>', output_path) + + self._Simulate(test_data_generators, + capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, + config_filepath, echo_path_simulator) + + @staticmethod + def _CreateInputSignal(input_signal_filepath): + """Creates a missing input signal file. + + The file name is parsed to extract input signal creator and params. If a + creator is matched and the parameters are valid, a new signal is generated + and written in `input_signal_filepath`. + + Args: + input_signal_filepath: Path to the input signal audio file to write. + + Raises: + InputSignalCreatorException + """ + filename = os.path.splitext( + os.path.split(input_signal_filepath)[-1])[0] + filename_parts = filename.split('-') + + if len(filename_parts) < 2: + raise exceptions.InputSignalCreatorException( + 'Cannot parse input signal file name') + + signal, metadata = input_signal_creator.InputSignalCreator.Create( + filename_parts[0], filename_parts[1].split('_')) + + signal_processing.SignalProcessingUtils.SaveWav( + input_signal_filepath, signal) + data_access.Metadata.SaveFileMetadata(input_signal_filepath, metadata) + + def _ExtractCaptureAnnotations(self, + input_filepath, + output_path, + annotation_name=""): + self._annotator.Extract(input_filepath) + self._annotator.Save(output_path, annotation_name) + + def _Simulate(self, test_data_generators, clean_capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, config_filepath, + echo_path_simulator): + """Runs a single set of simulation. + + Simulates a given combination of APM configuration, probing signal, and + test data generator. It iterates over the test data generator + internal configurations. + + Args: + test_data_generators: TestDataGenerator instance. + clean_capture_input_filepath: capture input audio track file to be + processed by a test data generator and + not affected by echo. + render_input_filepath: render input audio track file to test. + test_data_cache_path: path for the generated test audio track files. + echo_test_data_cache_path: path for the echo simulator. + output_path: base output path for the test data generator. + config_filepath: APM configuration file to test. + echo_path_simulator: EchoPathSimulator instance. + """ + # Generate pairs of noisy input and reference signal files. + test_data_generators.Generate( + input_signal_filepath=clean_capture_input_filepath, + test_data_cache_path=test_data_cache_path, + base_output_path=output_path) + + # Extract metadata linked to the clean input file (if any). + apm_input_metadata = None + try: + apm_input_metadata = data_access.Metadata.LoadFileMetadata( + clean_capture_input_filepath) + except IOError as e: + apm_input_metadata = {} + apm_input_metadata['test_data_gen_name'] = test_data_generators.NAME + apm_input_metadata['test_data_gen_config'] = None + + # For each test data pair, simulate a call and evaluate. + for config_name in test_data_generators.config_names: + logging.info(' - test data generator config: <%s>', config_name) + apm_input_metadata['test_data_gen_config'] = config_name + + # Paths to the test data generator output. + # Note that the reference signal does not depend on the render input + # which is optional. + noisy_capture_input_filepath = ( + test_data_generators.noisy_signal_filepaths[config_name]) + reference_signal_filepath = ( + test_data_generators.reference_signal_filepaths[config_name]) + + # Output path for the evaluation (e.g., APM output file). + evaluation_output_path = test_data_generators.apm_output_paths[ + config_name] + + # Paths to the APM input signals. + echo_path_filepath = echo_path_simulator.Simulate( + echo_test_data_cache_path) + apm_input_filepath = input_mixer.ApmInputMixer.Mix( + echo_test_data_cache_path, noisy_capture_input_filepath, + echo_path_filepath) + + # Extract annotations for the APM input mix. + apm_input_basepath, apm_input_filename = os.path.split( + apm_input_filepath) + self._ExtractCaptureAnnotations( + apm_input_filepath, apm_input_basepath, + os.path.splitext(apm_input_filename)[0] + '-') + + # Simulate a call using APM. + self._audioproc_wrapper.Run( + config_filepath=config_filepath, + capture_input_filepath=apm_input_filepath, + render_input_filepath=render_input_filepath, + output_path=evaluation_output_path) + + try: + # Evaluate. + self._evaluator.Run( + evaluation_score_workers=self._evaluation_score_workers, + apm_input_metadata=apm_input_metadata, + apm_output_filepath=self._audioproc_wrapper. + output_filepath, + reference_input_filepath=reference_signal_filepath, + render_input_filepath=render_input_filepath, + output_path=evaluation_output_path, + ) + + # Save simulation metadata. + data_access.Metadata.SaveAudioTestDataPaths( + output_path=evaluation_output_path, + clean_capture_input_filepath=clean_capture_input_filepath, + echo_free_capture_filepath=noisy_capture_input_filepath, + echo_filepath=echo_path_filepath, + render_filepath=render_input_filepath, + capture_filepath=apm_input_filepath, + apm_output_filepath=self._audioproc_wrapper. + output_filepath, + apm_reference_filepath=reference_signal_filepath, + apm_config_filepath=config_filepath, + ) + except exceptions.EvaluationScoreException as e: + logging.warning('the evaluation failed: %s', e.message) + continue + + def _SetTestInputSignalFilePaths(self, capture_input_filepaths, + render_input_filepaths): + """Sets input and render input file paths collections. + + Pairs the input and render input files by storing the file paths into two + collections. The key is the file name of the input file. + + Args: + capture_input_filepaths: list of file paths. + render_input_filepaths: list of file paths. + """ + self._capture_input_filepaths = {} + self._render_input_filepaths = {} + assert len(capture_input_filepaths) == len(render_input_filepaths) + for capture_input_filepath, render_input_filepath in zip( + capture_input_filepaths, render_input_filepaths): + name = self._ExtractFileName(capture_input_filepath) + self._capture_input_filepaths[name] = os.path.abspath( + capture_input_filepath) + self._render_input_filepaths[name] = os.path.abspath( + render_input_filepath) + + @classmethod + def _CreatePathsCollection(cls, filepaths): + """Creates a collection of file paths. + + Given a list of file paths, makes a collection with one item for each file + path. The value is absolute path, the key is the file name without + extenstion. + + Args: + filepaths: list of file paths. + + Returns: + A dict. + """ + filepaths_collection = {} + for filepath in filepaths: + name = cls._ExtractFileName(filepath) + filepaths_collection[name] = os.path.abspath(filepath) + return filepaths_collection + + @classmethod + def _ExtractFileName(cls, filepath): + return os.path.splitext(os.path.split(filepath)[-1])[0] diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py new file mode 100644 index 0000000000..78ca17f589 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py @@ -0,0 +1,203 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the simulation module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import mock +import pydub + +from . import audioproc_wrapper +from . import eval_scores_factory +from . import evaluation +from . import external_vad +from . import signal_processing +from . import simulation +from . import test_data_generation_factory + + +class TestApmModuleSimulator(unittest.TestCase): + """Unit tests for the ApmModuleSimulator class. + """ + + def setUp(self): + """Create temporary folders and fake audio track.""" + self._output_path = tempfile.mkdtemp() + self._tmp_path = tempfile.mkdtemp() + + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._fake_audio_track_path = os.path.join(self._output_path, + 'fake.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_audio_track_path, fake_signal) + + def tearDown(self): + """Recursively delete temporary folders.""" + shutil.rmtree(self._output_path) + shutil.rmtree(self._tmp_path) + + def testSimulation(self): + # Instance dependencies to mock and inject. + ap_wrapper = audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH) + evaluator = evaluation.ApmModuleEvaluator() + ap_wrapper.Run = mock.MagicMock(name='Run') + evaluator.Run = mock.MagicMock(name='Run') + + # Instance non-mocked dependencies. + test_data_generator_factory = ( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)) + evaluation_score_factory = eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None) + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=test_data_generator_factory, + evaluation_score_factory=evaluation_score_factory, + ap_wrapper=ap_wrapper, + evaluator=evaluator, + external_vads={ + 'fake': + external_vad.ExternalVad( + os.path.join(os.path.dirname(__file__), + 'fake_external_vad.py'), 'fake') + }) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [self._fake_audio_track_path] + test_data_generators = ['identity', 'white_noise'] + eval_scores = ['audio_level_mean', 'polqa'] + + # Run all simulations. + simulator.Run(config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=test_data_generators, + eval_score_names=eval_scores, + output_dir=self._output_path) + + # Check. + # TODO(alessiob): Once the TestDataGenerator classes can be configured by + # the client code (e.g., number of SNR pairs for the white noise test data + # generator), the exact number of calls to ap_wrapper.Run and evaluator.Run + # is known; use that with assertEqual. + min_number_of_simulations = len(config_files) * len(input_files) * len( + test_data_generators) + self.assertGreaterEqual(len(ap_wrapper.Run.call_args_list), + min_number_of_simulations) + self.assertGreaterEqual(len(evaluator.Run.call_args_list), + min_number_of_simulations) + + def testInputSignalCreation(self): + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # Inexistent input files to be silently created. + input_files = [ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-1000_500.wav'), + ] + self.assertFalse( + any([os.path.exists(input_file) for input_file in (input_files)])) + + # The input files are created during the simulation. + simulator.Run(config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=['audio_level_peak'], + output_dir=self._output_path) + self.assertTrue( + all([os.path.exists(input_file) for input_file in (input_files)])) + + def testPureToneGenerationWithTotalHarmonicDistorsion(self): + logging.warning = mock.MagicMock(name='warning') + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(os.path.dirname(__file__), + 'fake_polqa'), + echo_metric_tool_bin_path=None)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper. + DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [os.path.join(self._tmp_path, 'pure_tone-440_1000.wav')] + eval_scores = ['thd'] + + # Should work. + simulator.Run(config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=eval_scores, + output_dir=self._output_path) + self.assertFalse(logging.warning.called) + + # Warning expected. + simulator.Run( + config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['white_noise'], # Not allowed with THD. + eval_score_names=eval_scores, + output_dir=self._output_path) + logging.warning.assert_called_with('the evaluation failed: %s', ( + 'The THD score cannot be used with any test data generator other than ' + '"identity"')) + + # # Init. + # generator = test_data_generation.IdentityTestDataGenerator('tmp') + # input_signal_filepath = os.path.join( + # self._test_data_cache_path, 'pure_tone-440_1000.wav') + + # # Check that the input signal is generated. + # self.assertFalse(os.path.exists(input_signal_filepath)) + # generator.Generate( + # input_signal_filepath=input_signal_filepath, + # test_data_cache_path=self._test_data_cache_path, + # base_output_path=self._base_output_path) + # self.assertTrue(os.path.exists(input_signal_filepath)) + + # # Check input signal properties. + # input_signal = signal_processing.SignalProcessingUtils.LoadWav( + # input_signal_filepath) + # self.assertEqual(1000, len(input_signal)) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc new file mode 100644 index 0000000000..1f24d9d370 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/sound_level.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/wav_file.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, oc, "", "Config output file"); +ABSL_FLAG(std::string, ol, "", "Levels output file"); +ABSL_FLAG(float, a, 5.f, "Attack (ms)"); +ABSL_FLAG(float, d, 20.f, "Decay (ms)"); +ABSL_FLAG(int, f, 10, "Frame length (ms)"); + +namespace webrtc { +namespace test { +namespace { + +constexpr int kMaxSampleRate = 48000; +constexpr uint8_t kMaxFrameLenMs = 30; +constexpr size_t kMaxFrameLen = kMaxFrameLenMs * kMaxSampleRate / 1000; + +const double kOneDbReduction = DbToRatio(-1.0); + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + // Check parameters. + if (absl::GetFlag(FLAGS_f) < 1 || absl::GetFlag(FLAGS_f) > kMaxFrameLenMs) { + RTC_LOG(LS_ERROR) << "Invalid frame length (min: 1, max: " << kMaxFrameLenMs + << ")"; + return 1; + } + if (absl::GetFlag(FLAGS_a) < 0 || absl::GetFlag(FLAGS_d) < 0) { + RTC_LOG(LS_ERROR) << "Attack and decay must be non-negative"; + return 1; + } + + // Open wav input file and check properties. + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string config_output_file = absl::GetFlag(FLAGS_oc); + const std::string levels_output_file = absl::GetFlag(FLAGS_ol); + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + + // Map from milliseconds to samples. + const size_t audio_frame_length = rtc::CheckedDivExact( + absl::GetFlag(FLAGS_f) * wav_reader.sample_rate(), 1000); + auto time_const = [](double c) { + return std::pow(kOneDbReduction, absl::GetFlag(FLAGS_f) / c); + }; + const float attack = + absl::GetFlag(FLAGS_a) == 0.0 ? 0.0 : time_const(absl::GetFlag(FLAGS_a)); + const float decay = + absl::GetFlag(FLAGS_d) == 0.0 ? 0.0 : time_const(absl::GetFlag(FLAGS_d)); + + // Write config to file. + std::ofstream out_config(config_output_file); + out_config << "{" + "'frame_len_ms': " + << absl::GetFlag(FLAGS_f) + << ", " + "'attack_ms': " + << absl::GetFlag(FLAGS_a) + << ", " + "'decay_ms': " + << absl::GetFlag(FLAGS_d) << "}\n"; + out_config.close(); + + // Measure level frame-by-frame. + std::ofstream out_levels(levels_output_file, std::ofstream::binary); + std::array samples; + float level_prev = 0.f; + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_length, samples.data()); + if (read_samples < audio_frame_length) + break; // EOF. + + // Frame peak level. + std::transform(samples.begin(), samples.begin() + audio_frame_length, + samples.begin(), [](int16_t s) { return std::abs(s); }); + const int16_t peak_level = *std::max_element( + samples.cbegin(), samples.cbegin() + audio_frame_length); + const float level_curr = static_cast(peak_level) / 32768.f; + + // Temporal smoothing. + auto smooth = [&level_prev, &level_curr](float c) { + return (1.0 - c) * level_curr + c * level_prev; + }; + level_prev = smooth(level_curr > level_prev ? attack : decay); + + // Write output. + out_levels.write(reinterpret_cast(&level_prev), sizeof(float)); + } + out_levels.close(); + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py new file mode 100644 index 0000000000..7e86faccec --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py @@ -0,0 +1,526 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Test data generators producing signals pairs intended to be used to +test the APM module. Each pair consists of a noisy input and a reference signal. +The former is used as APM input and it is generated by adding noise to a +clean audio track. The reference is the expected APM output. + +Throughout this file, the following naming convention is used: + - input signal: the clean signal (e.g., speech), + - noise signal: the noise to be summed up to the input signal (e.g., white + noise, Gaussian noise), + - noisy signal: input + noise. +The noise signal may or may not be a function of the clean signal. For +instance, white noise is independently generated, whereas reverberation is +obtained by convolving the input signal with an impulse response. +""" + +import logging +import os +import shutil +import sys + +try: + import scipy.io +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class TestDataGenerator(object): + """Abstract class responsible for the generation of noisy signals. + + Given a clean signal, it generates two streams named noisy signal and + reference. The former is the clean signal deteriorated by the noise source, + the latter goes through the same deterioration process, but more "gently". + Noisy signal and reference are produced so that the reference is the signal + expected at the output of the APM module when the latter is fed with the noisy + signal. + + An test data generator generates one or more pairs. + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, output_directory_prefix): + self._output_directory_prefix = output_directory_prefix + # Init dictionaries with one entry for each test data generator + # configuration (e.g., different SNRs). + # Noisy audio track files (stored separately in a cache folder). + self._noisy_signal_filepaths = None + # Path to be used for the APM simulation output files. + self._apm_output_paths = None + # Reference audio track files (stored separately in a cache folder). + self._reference_signal_filepaths = None + self.Clear() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers a TestDataGenerator implementation. + + Decorator to automatically register the classes that extend + TestDataGenerator. + Example usage: + + @TestDataGenerator.RegisterClass + class IdentityGenerator(TestDataGenerator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def config_names(self): + return self._noisy_signal_filepaths.keys() + + @property + def noisy_signal_filepaths(self): + return self._noisy_signal_filepaths + + @property + def apm_output_paths(self): + return self._apm_output_paths + + @property + def reference_signal_filepaths(self): + return self._reference_signal_filepaths + + def Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates a set of noisy input and reference audiotrack file pairs. + + This method initializes an empty set of pairs and calls the _Generate() + method implemented in a concrete class. + + Args: + input_signal_filepath: path to the clean input audio track file. + test_data_cache_path: path to the cache of the generated audio track + files. + base_output_path: base path where output is written. + """ + self.Clear() + self._Generate(input_signal_filepath, test_data_cache_path, + base_output_path) + + def Clear(self): + """Clears the generated output path dictionaries. + """ + self._noisy_signal_filepaths = {} + self._apm_output_paths = {} + self._reference_signal_filepaths = {} + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Abstract method to be implemented in each concrete class. + """ + raise NotImplementedError() + + def _AddNoiseSnrPairs(self, base_output_path, noisy_mix_filepaths, + snr_value_pairs): + """Adds noisy-reference signal pairs. + + Args: + base_output_path: noisy tracks base output path. + noisy_mix_filepaths: nested dictionary of noisy signal paths organized + by noisy track name and SNR level. + snr_value_pairs: list of SNR pairs. + """ + for noise_track_name in noisy_mix_filepaths: + for snr_noisy, snr_refence in snr_value_pairs: + config_name = '{0}_{1:d}_{2:d}_SNR'.format( + noise_track_name, snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[noise_track_name] + [snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[ + noise_track_name][snr_refence], + output_path=output_path) + + def _AddNoiseReferenceFilesPair(self, config_name, noisy_signal_filepath, + reference_signal_filepath, output_path): + """Adds one noisy-reference signal pair. + + Args: + config_name: name of the APM configuration. + noisy_signal_filepath: path to noisy audio track file. + reference_signal_filepath: path to reference audio track file. + output_path: APM output path. + """ + assert config_name not in self._noisy_signal_filepaths + self._noisy_signal_filepaths[config_name] = os.path.abspath( + noisy_signal_filepath) + self._apm_output_paths[config_name] = os.path.abspath(output_path) + self._reference_signal_filepaths[config_name] = os.path.abspath( + reference_signal_filepath) + + def _MakeDir(self, base_output_path, test_data_generator_config_name): + output_path = os.path.join( + base_output_path, + self._output_directory_prefix + test_data_generator_config_name) + data_access.MakeDirectory(output_path) + return output_path + + +@TestDataGenerator.RegisterClass +class IdentityTestDataGenerator(TestDataGenerator): + """Generator that adds no noise. + + Both the noisy and the reference signals are the input signal. + """ + + NAME = 'identity' + + def __init__(self, output_directory_prefix, copy_with_identity): + TestDataGenerator.__init__(self, output_directory_prefix) + self._copy_with_identity = copy_with_identity + + @property + def copy_with_identity(self): + return self._copy_with_identity + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + config_name = 'default' + output_path = self._MakeDir(base_output_path, config_name) + + if self._copy_with_identity: + input_signal_filepath_new = os.path.join( + test_data_cache_path, + os.path.split(input_signal_filepath)[1]) + logging.info('copying ' + input_signal_filepath + ' to ' + + (input_signal_filepath_new)) + shutil.copy(input_signal_filepath, input_signal_filepath_new) + input_signal_filepath = input_signal_filepath_new + + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=input_signal_filepath, + reference_signal_filepath=input_signal_filepath, + output_path=output_path) + + +@TestDataGenerator.RegisterClass +class WhiteNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds white noise. + """ + + NAME = 'white_noise' + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + _NOISY_SIGNAL_FILENAME_TEMPLATE = 'noise_{0:d}_SNR.wav' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + # Create the noise track. + noise_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + input_signal) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths = {} + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format(snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[snr] = noisy_signal_filepath + + # Add all the noisy-reference signal pairs. + for snr_noisy, snr_refence in self._SNR_VALUE_PAIRS: + config_name = '{0:d}_{1:d}_SNR'.format(snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[snr_refence], + output_path=output_path) + + +# TODO(alessiob): remove comment when class implemented. +# @TestDataGenerator.RegisterClass +class NarrowBandNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds narrow-band noise. + """ + + NAME = 'narrow_band_noise' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + # TODO(alessiob): implement. + pass + + +@TestDataGenerator.RegisterClass +class AdditiveNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds noise loops. + + This generator uses all the wav files in a given path (default: noise_tracks/) + and mixes them to the clean speech with different target SNRs (hard-coded). + """ + + NAME = 'additive_noise' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + DEFAULT_NOISE_TRACKS_PATH = os.path.join(os.path.dirname(__file__), + os.pardir, 'noise_tracks') + + # TODO(alessiob): Make the list of SNR pairs customizable. + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + def __init__(self, output_directory_prefix, noise_tracks_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._noise_tracks_path = noise_tracks_path + self._noise_tracks_file_names = [ + n for n in os.listdir(self._noise_tracks_path) + if n.lower().endswith('.wav') + ] + if len(self._noise_tracks_file_names) == 0: + raise exceptions.InitializationException( + 'No wav files found in the noise tracks path %s' % + (self._noise_tracks_path)) + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates test data pairs using environmental noise. + + For each noise track and pair of SNR values, the following two audio tracks + are created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for noise_track_filename in self._noise_tracks_file_names: + # Load the noise track. + noise_track_name, _ = os.path.splitext(noise_track_filename) + noise_track_filepath = os.path.join(self._noise_tracks_path, + noise_track_filename) + if not os.path.exists(noise_track_filepath): + logging.error('cannot find the <%s> noise track', + noise_track_filename) + raise exceptions.FileNotFoundError() + + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[noise_track_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format( + noise_track_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, + noise_signal, + snr, + pad_noise=signal_processing.SignalProcessingUtils. + MixPadding.LOOP) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[noise_track_name][ + snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs(base_output_path, noisy_mix_filepaths, + self._SNR_VALUE_PAIRS) + + +@TestDataGenerator.RegisterClass +class ReverberationTestDataGenerator(TestDataGenerator): + """Generator that adds reverberation noise. + + TODO(alessiob): Make this class more generic since the impulse response can be + anything (not just reverberation); call it e.g., + ConvolutionalNoiseTestDataGenerator. + """ + + NAME = 'reverberation' + + _IMPULSE_RESPONSES = { + 'lecture': 'air_binaural_lecture_0_0_1.mat', # Long echo. + 'booth': 'air_binaural_booth_0_0_1.mat', # Short echo. + } + _MAX_IMPULSE_RESPONSE_LENGTH = None + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 5 dB higher. + _SNR_VALUE_PAIRS = [ + [3, 8], # Smallest noise. + [-3, 2], # Largest noise. + ] + + _NOISE_TRACK_FILENAME_TEMPLATE = '{0}.wav' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + def __init__(self, output_directory_prefix, aechen_ir_database_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._aechen_ir_database_path = aechen_ir_database_path + + def _Generate(self, input_signal_filepath, test_data_cache_path, + base_output_path): + """Generates test data pairs using reverberation noise. + + For each impulse response, one noise track is created. For each impulse + response and pair of SNR values, the following 2 audio tracks are + created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set( + [snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for impulse_response_name in self._IMPULSE_RESPONSES: + noise_track_filename = self._NOISE_TRACK_FILENAME_TEMPLATE.format( + impulse_response_name) + noise_track_filepath = os.path.join(test_data_cache_path, + noise_track_filename) + noise_signal = None + try: + # Load noise track. + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + except exceptions.FileNotFoundError: + # Generate noise track by applying the impulse response. + impulse_response_filepath = os.path.join( + self._aechen_ir_database_path, + self._IMPULSE_RESPONSES[impulse_response_name]) + noise_signal = self._GenerateNoiseTrack( + noise_track_filepath, input_signal, + impulse_response_filepath) + assert noise_signal is not None + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[impulse_response_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format( + impulse_response_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[impulse_response_name][ + snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs(base_output_path, noisy_mix_filepaths, + self._SNR_VALUE_PAIRS) + + def _GenerateNoiseTrack(self, noise_track_filepath, input_signal, + impulse_response_filepath): + """Generates noise track. + + Generate a signal by convolving input_signal with the impulse response in + impulse_response_filepath; then save to noise_track_filepath. + + Args: + noise_track_filepath: output file path for the noise track. + input_signal: (clean) input signal samples. + impulse_response_filepath: impulse response file path. + + Returns: + AudioSegment instance. + """ + # Load impulse response. + data = scipy.io.loadmat(impulse_response_filepath) + impulse_response = data['h_air'].flatten() + if self._MAX_IMPULSE_RESPONSE_LENGTH is not None: + logging.info('truncating impulse response from %d to %d samples', + len(impulse_response), + self._MAX_IMPULSE_RESPONSE_LENGTH) + impulse_response = impulse_response[:self. + _MAX_IMPULSE_RESPONSE_LENGTH] + + # Apply impulse response. + processed_signal = ( + signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + input_signal, impulse_response)) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noise_track_filepath, processed_signal) + + return processed_signal diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py new file mode 100644 index 0000000000..948888e775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py @@ -0,0 +1,71 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""TestDataGenerator factory class. +""" + +import logging + +from . import exceptions +from . import test_data_generation + + +class TestDataGeneratorFactory(object): + """Factory class used to create test data generators. + + Usage: Create a factory passing parameters to the ctor with which the + generators will be produced. + """ + + def __init__(self, aechen_ir_database_path, noise_tracks_path, + copy_with_identity): + """Ctor. + + Args: + aechen_ir_database_path: Path to the Aechen Impulse Response database. + noise_tracks_path: Path to the noise tracks to add. + copy_with_identity: Flag indicating whether the identity generator has to + make copies of the clean speech input files. + """ + self._output_directory_prefix = None + self._aechen_ir_database_path = aechen_ir_database_path + self._noise_tracks_path = noise_tracks_path + self._copy_with_identity = copy_with_identity + + def SetOutputDirectoryPrefix(self, prefix): + self._output_directory_prefix = prefix + + def GetInstance(self, test_data_generators_class): + """Creates an TestDataGenerator instance given a class object. + + Args: + test_data_generators_class: TestDataGenerator class object (not an + instance). + + Returns: + TestDataGenerator instance. + """ + if self._output_directory_prefix is None: + raise exceptions.InitializationException( + 'The output directory prefix for test data generators is not set' + ) + logging.debug('factory producing %s', test_data_generators_class) + + if test_data_generators_class == ( + test_data_generation.IdentityTestDataGenerator): + return test_data_generation.IdentityTestDataGenerator( + self._output_directory_prefix, self._copy_with_identity) + elif test_data_generators_class == ( + test_data_generation.ReverberationTestDataGenerator): + return test_data_generation.ReverberationTestDataGenerator( + self._output_directory_prefix, self._aechen_ir_database_path) + elif test_data_generators_class == ( + test_data_generation.AdditiveNoiseTestDataGenerator): + return test_data_generation.AdditiveNoiseTestDataGenerator( + self._output_directory_prefix, self._noise_tracks_path) + else: + return test_data_generators_class(self._output_directory_prefix) diff --git a/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py new file mode 100644 index 0000000000..f75098ae2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py @@ -0,0 +1,207 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +"""Unit tests for the test_data_generation module. +""" + +import os +import shutil +import tempfile +import unittest + +import numpy as np +import scipy.io + +from . import test_data_generation +from . import test_data_generation_factory +from . import signal_processing + + +class TestTestDataGenerators(unittest.TestCase): + """Unit tests for the test_data_generation module. + """ + + def setUp(self): + """Create temporary folders.""" + self._base_output_path = tempfile.mkdtemp() + self._test_data_cache_path = tempfile.mkdtemp() + self._fake_air_db_path = tempfile.mkdtemp() + + # Fake AIR DB impulse responses. + # TODO(alessiob): ReverberationTestDataGenerator will change to allow custom + # impulse responses. When changed, the coupling below between + # impulse_response_mat_file_names and + # ReverberationTestDataGenerator._IMPULSE_RESPONSES can be removed. + impulse_response_mat_file_names = [ + 'air_binaural_lecture_0_0_1.mat', + 'air_binaural_booth_0_0_1.mat', + ] + for impulse_response_mat_file_name in impulse_response_mat_file_names: + data = {'h_air': np.random.rand(1, 1000).astype(' +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/wav_file.h" +#include "rtc_base/logging.h" + +ABSL_FLAG(std::string, i, "", "Input wav file"); +ABSL_FLAG(std::string, o, "", "VAD output file"); + +namespace webrtc { +namespace test { +namespace { + +// The allowed values are 10, 20 or 30 ms. +constexpr uint8_t kAudioFrameLengthMilliseconds = 30; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +constexpr uint8_t kBitmaskBuffSize = 8; + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + const std::string input_file = absl::GetFlag(FLAGS_i); + const std::string output_file = absl::GetFlag(FLAGS_o); + // Open wav input file and check properties. + WavReader wav_reader(input_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_length = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_length > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_file(output_file, std::ofstream::binary); + const char audio_frame_length_ms = kAudioFrameLengthMilliseconds; + out_file.write(&audio_frame_length_ms, 1); // Header. + + // Run VAD and write decisions. + std::unique_ptr vad = CreateVad(Vad::Aggressiveness::kVadNormal); + std::array samples; + char buff = 0; // Buffer to write one bit per frame. + uint8_t next = 0; // Points to the next bit to write in `buff`. + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_length, samples.data()); + if (read_samples < audio_frame_length) + break; + const auto is_speech = vad->VoiceActivity( + samples.data(), audio_frame_length, wav_reader.sample_rate()); + + // Write output. + buff = is_speech ? buff | (1 << next) : buff & ~(1 << next); + if (++next == kBitmaskBuffSize) { + out_file.write(&buff, 1); // Flush. + buff = 0; // Reset. + next = 0; + } + } + + // Finalize. + char extra_bits = 0; + if (next > 0) { + extra_bits = kBitmaskBuffSize - next; + out_file.write(&buff, 1); // Flush. + } + out_file.write(&extra_bits, 1); + out_file.close(); + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc new file mode 100644 index 0000000000..4899d2d459 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/runtime_setting_util.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void ReplayRuntimeSetting(AudioProcessing* apm, + const webrtc::audioproc::RuntimeSetting& setting) { + RTC_CHECK(apm); + // TODO(bugs.webrtc.org/9138): Add ability to handle different types + // of settings. Currently CapturePreGain, CaptureFixedPostGain and + // PlayoutVolumeChange are supported. + RTC_CHECK(setting.has_capture_pre_gain() || + setting.has_capture_fixed_post_gain() || + setting.has_playout_volume_change()); + + if (setting.has_capture_pre_gain()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCapturePreGain( + setting.capture_pre_gain())); + } else if (setting.has_capture_fixed_post_gain()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureFixedPostGain( + setting.capture_fixed_post_gain())); + } else if (setting.has_playout_volume_change()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange( + setting.playout_volume_change())); + } else if (setting.has_playout_audio_device_change()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreatePlayoutAudioDeviceChange( + {setting.playout_audio_device_change().id(), + setting.playout_audio_device_change().max_volume()})); + } else if (setting.has_capture_output_used()) { + apm->SetRuntimeSetting( + AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting( + setting.capture_output_used())); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h new file mode 100644 index 0000000000..d8cbe82076 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/runtime_setting_util.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/protobuf_utils.h" + +namespace webrtc { + +void ReplayRuntimeSetting(AudioProcessing* apm, + const webrtc::audioproc::RuntimeSetting& setting); +} + +#endif // MODULES_AUDIO_PROCESSING_TEST_RUNTIME_SETTING_UTIL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc new file mode 100644 index 0000000000..458f6ced76 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.cc @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/simulator_buffers.h" + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +SimulatorBuffers::SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels) { + Random rand_gen(42); + CreateConfigAndBuffer(render_input_sample_rate_hz, num_render_input_channels, + &rand_gen, &render_input_buffer, &render_input_config, + &render_input, &render_input_samples); + + CreateConfigAndBuffer(render_output_sample_rate_hz, + num_render_output_channels, &rand_gen, + &render_output_buffer, &render_output_config, + &render_output, &render_output_samples); + + CreateConfigAndBuffer(capture_input_sample_rate_hz, + num_capture_input_channels, &rand_gen, + &capture_input_buffer, &capture_input_config, + &capture_input, &capture_input_samples); + + CreateConfigAndBuffer(capture_output_sample_rate_hz, + num_capture_output_channels, &rand_gen, + &capture_output_buffer, &capture_output_config, + &capture_output, &capture_output_samples); + + UpdateInputBuffers(); +} + +SimulatorBuffers::~SimulatorBuffers() = default; + +void SimulatorBuffers::CreateConfigAndBuffer( + int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr* buffer, + StreamConfig* config, + std::vector* buffer_data, + std::vector* buffer_data_samples) { + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + *config = StreamConfig(sample_rate_hz, num_channels); + buffer->reset( + new AudioBuffer(config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels())); + + buffer_data_samples->resize(samples_per_channel * num_channels); + for (auto& v : *buffer_data_samples) { + v = rand_gen->Rand(); + } + + buffer_data->resize(num_channels); + for (size_t ch = 0; ch < num_channels; ++ch) { + (*buffer_data)[ch] = &(*buffer_data_samples)[ch * samples_per_channel]; + } +} + +void SimulatorBuffers::UpdateInputBuffers() { + test::CopyVectorToAudioBuffer(capture_input_config, capture_input_samples, + capture_input_buffer.get()); + test::CopyVectorToAudioBuffer(render_input_config, render_input_samples, + render_input_buffer.get()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h new file mode 100644 index 0000000000..36dcf301a2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/simulator_buffers.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ + +#include +#include + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +struct SimulatorBuffers { + SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels); + ~SimulatorBuffers(); + + void CreateConfigAndBuffer(int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr* buffer, + StreamConfig* config, + std::vector* buffer_data, + std::vector* buffer_data_samples); + + void UpdateInputBuffers(); + + std::unique_ptr render_input_buffer; + std::unique_ptr capture_input_buffer; + std::unique_ptr render_output_buffer; + std::unique_ptr capture_output_buffer; + StreamConfig render_input_config; + StreamConfig capture_input_config; + StreamConfig render_output_config; + StreamConfig capture_output_config; + std::vector render_input; + std::vector render_input_samples; + std::vector capture_input; + std::vector capture_input_samples; + std::vector render_output; + std::vector render_output_samples; + std::vector capture_output; + std::vector capture_output_samples; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc b/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc new file mode 100644 index 0000000000..9aeebe5155 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/test_utils.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/test_utils.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { + +ChannelBufferWavReader::ChannelBufferWavReader(std::unique_ptr file) + : file_(std::move(file)) {} + +ChannelBufferWavReader::~ChannelBufferWavReader() = default; + +bool ChannelBufferWavReader::Read(ChannelBuffer* buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer->num_channels()); + interleaved_.resize(buffer->size()); + if (file_->ReadSamples(interleaved_.size(), &interleaved_[0]) != + interleaved_.size()) { + return false; + } + + FloatS16ToFloat(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + Deinterleave(&interleaved_[0], buffer->num_frames(), buffer->num_channels(), + buffer->channels()); + return true; +} + +ChannelBufferWavWriter::ChannelBufferWavWriter(std::unique_ptr file) + : file_(std::move(file)) {} + +ChannelBufferWavWriter::~ChannelBufferWavWriter() = default; + +void ChannelBufferWavWriter::Write(const ChannelBuffer& buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer.num_channels()); + interleaved_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + &interleaved_[0]); + FloatToFloatS16(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + file_->WriteSamples(&interleaved_[0], interleaved_.size()); +} + +ChannelBufferVectorWriter::ChannelBufferVectorWriter(std::vector* output) + : output_(output) { + RTC_DCHECK(output_); +} + +ChannelBufferVectorWriter::~ChannelBufferVectorWriter() = default; + +void ChannelBufferVectorWriter::Write(const ChannelBuffer& buffer) { + // Account for sample rate changes throughout a simulation. + interleaved_buffer_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + interleaved_buffer_.data()); + size_t old_size = output_->size(); + output_->resize(old_size + interleaved_buffer_.size()); + FloatToFloatS16(interleaved_buffer_.data(), interleaved_buffer_.size(), + output_->data() + old_size); +} + +FILE* OpenFile(absl::string_view filename, absl::string_view mode) { + std::string filename_str(filename); + FILE* file = fopen(filename_str.c_str(), std::string(mode).c_str()); + if (!file) { + printf("Unable to open file %s\n", filename_str.c_str()); + exit(1); + } + return file; +} + +void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz) { + frame->sample_rate_hz = sample_rate_hz; + frame->samples_per_channel = + AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/test_utils.h b/third_party/libwebrtc/modules/audio_processing/test/test_utils.h new file mode 100644 index 0000000000..bf82f9d66d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/test_utils.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ + +#include + +#include +#include +#include +#include // no-presubmit-check TODO(webrtc:8982) +#include +#include + +#include "absl/strings/string_view.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +static const AudioProcessing::Error kNoErr = AudioProcessing::kNoError; +#define EXPECT_NOERR(expr) EXPECT_EQ(kNoErr, (expr)) + +// Encapsulates samples and metadata for an integer frame. +struct Int16FrameData { + // Max data size that matches the data size of the AudioFrame class, providing + // storage for 8 channels of 96 kHz data. + static const int kMaxDataSizeSamples = 7680; + + Int16FrameData() { + sample_rate_hz = 0; + num_channels = 0; + samples_per_channel = 0; + data.fill(0); + } + + void CopyFrom(const Int16FrameData& src) { + samples_per_channel = src.samples_per_channel; + sample_rate_hz = src.sample_rate_hz; + num_channels = src.num_channels; + + const size_t length = samples_per_channel * num_channels; + RTC_CHECK_LE(length, kMaxDataSizeSamples); + memcpy(data.data(), src.data.data(), sizeof(int16_t) * length); + } + std::array data; + int32_t sample_rate_hz; + size_t num_channels; + size_t samples_per_channel; +}; + +// Reads ChannelBuffers from a provided WavReader. +class ChannelBufferWavReader final { + public: + explicit ChannelBufferWavReader(std::unique_ptr file); + ~ChannelBufferWavReader(); + + ChannelBufferWavReader(const ChannelBufferWavReader&) = delete; + ChannelBufferWavReader& operator=(const ChannelBufferWavReader&) = delete; + + // Reads data from the file according to the `buffer` format. Returns false if + // a full buffer can't be read from the file. + bool Read(ChannelBuffer* buffer); + + private: + std::unique_ptr file_; + std::vector interleaved_; +}; + +// Writes ChannelBuffers to a provided WavWriter. +class ChannelBufferWavWriter final { + public: + explicit ChannelBufferWavWriter(std::unique_ptr file); + ~ChannelBufferWavWriter(); + + ChannelBufferWavWriter(const ChannelBufferWavWriter&) = delete; + ChannelBufferWavWriter& operator=(const ChannelBufferWavWriter&) = delete; + + void Write(const ChannelBuffer& buffer); + + private: + std::unique_ptr file_; + std::vector interleaved_; +}; + +// Takes a pointer to a vector. Allows appending the samples of channel buffers +// to the given vector, by interleaving the samples and converting them to float +// S16. +class ChannelBufferVectorWriter final { + public: + explicit ChannelBufferVectorWriter(std::vector* output); + ChannelBufferVectorWriter(const ChannelBufferVectorWriter&) = delete; + ChannelBufferVectorWriter& operator=(const ChannelBufferVectorWriter&) = + delete; + ~ChannelBufferVectorWriter(); + + // Creates an interleaved copy of `buffer`, converts the samples to float S16 + // and appends the result to output_. + void Write(const ChannelBuffer& buffer); + + private: + std::vector interleaved_buffer_; + std::vector* output_; +}; + +// Exits on failure; do not use in unit tests. +FILE* OpenFile(absl::string_view filename, absl::string_view mode); + +void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz); + +template +void SetContainerFormat(int sample_rate_hz, + size_t num_channels, + Int16FrameData* frame, + std::unique_ptr >* cb) { + SetFrameSampleRate(frame, sample_rate_hz); + frame->num_channels = num_channels; + cb->reset(new ChannelBuffer(frame->samples_per_channel, num_channels)); +} + +template +float ComputeSNR(const T* ref, const T* test, size_t length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (size_t i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + +// Returns a vector parsed from whitespace delimited values in to_parse, +// or an empty vector if the string could not be parsed. +template +std::vector ParseList(absl::string_view to_parse) { + std::vector values; + + std::istringstream str( // no-presubmit-check TODO(webrtc:8982) + std::string{to_parse}); + std::copy( + std::istream_iterator(str), // no-presubmit-check TODO(webrtc:8982) + std::istream_iterator(), // no-presubmit-check TODO(webrtc:8982) + std::back_inserter(values)); + + return values; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/test/unittest.proto b/third_party/libwebrtc/modules/audio_processing/test/unittest.proto new file mode 100644 index 0000000000..07d1cda6c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/unittest.proto @@ -0,0 +1,48 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +message Test { + optional int32 num_reverse_channels = 1; + optional int32 num_input_channels = 2; + optional int32 num_output_channels = 3; + optional int32 sample_rate = 4; + + message Frame { + } + + repeated Frame frame = 5; + + optional int32 analog_level_average = 6; + optional int32 max_output_average = 7; + optional int32 has_voice_count = 9; + optional int32 is_saturated_count = 10; + + message EchoMetrics { + optional float echo_return_loss = 1; + optional float echo_return_loss_enhancement = 2; + optional float divergent_filter_fraction = 3; + optional float residual_echo_likelihood = 4; + optional float residual_echo_likelihood_recent_max = 5; + } + + repeated EchoMetrics echo_metrics = 11; + + message DelayMetrics { + optional int32 median = 1; + optional int32 std = 2; + } + + repeated DelayMetrics delay_metrics = 12; + + optional float rms_dbfs_average = 13; + + optional float ns_speech_probability_average = 14; + + optional bool use_aec_extended_filter = 15; +} + +message OutputData { + repeated Test test = 1; +} + diff --git a/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc new file mode 100644 index 0000000000..ee87f9e1a8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/wav_based_simulator.h" + +#include + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { +namespace test { + +std::vector +WavBasedSimulator::GetCustomEventChain(absl::string_view filename) { + std::vector call_chain; + FileWrapper file_wrapper = FileWrapper::OpenReadOnly(filename); + + RTC_CHECK(file_wrapper.is_open()) + << "Could not open the custom call order file, reverting " + "to using the default call order"; + + char c; + size_t num_read = file_wrapper.Read(&c, sizeof(char)); + while (num_read > 0) { + switch (c) { + case 'r': + call_chain.push_back(SimulationEventType::kProcessReverseStream); + break; + case 'c': + call_chain.push_back(SimulationEventType::kProcessStream); + break; + case '\n': + break; + default: + RTC_FATAL() << "Incorrect custom call order file"; + } + + num_read = file_wrapper.Read(&c, sizeof(char)); + } + + return call_chain; +} + +WavBasedSimulator::WavBasedSimulator( + const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder) + : AudioProcessingSimulator(settings, + std::move(audio_processing), + std::move(ap_builder)) { + if (settings_.call_order_input_filename) { + call_chain_ = WavBasedSimulator::GetCustomEventChain( + *settings_.call_order_input_filename); + } else { + call_chain_ = WavBasedSimulator::GetDefaultEventChain(); + } +} + +WavBasedSimulator::~WavBasedSimulator() = default; + +std::vector +WavBasedSimulator::GetDefaultEventChain() { + std::vector call_chain(2); + call_chain[0] = SimulationEventType::kProcessStream; + call_chain[1] = SimulationEventType::kProcessReverseStream; + return call_chain; +} + +void WavBasedSimulator::PrepareProcessStreamCall() { + if (settings_.fixed_interface) { + fwd_frame_.CopyFrom(*in_buf_); + } + ap_->set_stream_key_pressed(settings_.override_key_pressed.value_or(false)); + + if (!settings_.use_stream_delay || *settings_.use_stream_delay) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms( + settings_.stream_delay ? *settings_.stream_delay : 0)); + } +} + +void WavBasedSimulator::PrepareReverseProcessStreamCall() { + if (settings_.fixed_interface) { + rev_frame_.CopyFrom(*reverse_in_buf_); + } +} + +void WavBasedSimulator::Process() { + ConfigureAudioProcessor(); + + Initialize(); + + bool samples_left_to_process = true; + int call_chain_index = 0; + int capture_frames_since_init = 0; + constexpr int kInitIndex = 1; + while (samples_left_to_process) { + switch (call_chain_[call_chain_index]) { + case SimulationEventType::kProcessStream: + SelectivelyToggleDataDumping(kInitIndex, capture_frames_since_init); + + samples_left_to_process = HandleProcessStreamCall(); + ++capture_frames_since_init; + break; + case SimulationEventType::kProcessReverseStream: + if (settings_.reverse_input_filename) { + samples_left_to_process = HandleProcessReverseStreamCall(); + } + break; + default: + RTC_CHECK_NOTREACHED(); + } + + call_chain_index = (call_chain_index + 1) % call_chain_.size(); + } + + DetachAecDump(); +} + +void WavBasedSimulator::Analyze() { + std::cout << "Inits:" << std::endl; + std::cout << "1: -->" << std::endl; + std::cout << " Time:" << std::endl; + std::cout << " Capture: 0 s (0 frames) " << std::endl; + std::cout << " Render: 0 s (0 frames)" << std::endl; +} + +bool WavBasedSimulator::HandleProcessStreamCall() { + bool samples_left_to_process = buffer_reader_->Read(in_buf_.get()); + if (samples_left_to_process) { + PrepareProcessStreamCall(); + ProcessStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +bool WavBasedSimulator::HandleProcessReverseStreamCall() { + bool samples_left_to_process = + reverse_buffer_reader_->Read(reverse_in_buf_.get()); + if (samples_left_to_process) { + PrepareReverseProcessStreamCall(); + ProcessReverseStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +void WavBasedSimulator::Initialize() { + std::unique_ptr in_file( + new WavReader(settings_.input_filename->c_str())); + int input_sample_rate_hz = in_file->sample_rate(); + int input_num_channels = in_file->num_channels(); + buffer_reader_.reset(new ChannelBufferWavReader(std::move(in_file))); + + int output_sample_rate_hz = settings_.output_sample_rate_hz + ? *settings_.output_sample_rate_hz + : input_sample_rate_hz; + int output_num_channels = settings_.output_num_channels + ? *settings_.output_num_channels + : input_num_channels; + + int reverse_sample_rate_hz = 48000; + int reverse_num_channels = 1; + int reverse_output_sample_rate_hz = 48000; + int reverse_output_num_channels = 1; + if (settings_.reverse_input_filename) { + std::unique_ptr reverse_in_file( + new WavReader(settings_.reverse_input_filename->c_str())); + reverse_sample_rate_hz = reverse_in_file->sample_rate(); + reverse_num_channels = reverse_in_file->num_channels(); + reverse_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(reverse_in_file))); + + reverse_output_sample_rate_hz = + settings_.reverse_output_sample_rate_hz + ? *settings_.reverse_output_sample_rate_hz + : reverse_sample_rate_hz; + reverse_output_num_channels = settings_.reverse_output_num_channels + ? *settings_.reverse_output_num_channels + : reverse_num_channels; + } + + SetupBuffersConfigsOutputs( + input_sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_output_sample_rate_hz, input_num_channels, output_num_channels, + reverse_num_channels, reverse_output_num_channels); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h new file mode 100644 index 0000000000..44e9ee2b7f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/test/wav_based_simulator.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ + +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/test/audio_processing_simulator.h" + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from wav files. +class WavBasedSimulator final : public AudioProcessingSimulator { + public: + WavBasedSimulator(const SimulationSettings& settings, + rtc::scoped_refptr audio_processing, + std::unique_ptr ap_builder); + + WavBasedSimulator() = delete; + WavBasedSimulator(const WavBasedSimulator&) = delete; + WavBasedSimulator& operator=(const WavBasedSimulator&) = delete; + + ~WavBasedSimulator() override; + + // Processes the WAV input. + void Process() override; + + // Only analyzes the data for the simulation, instead of perform any + // processing. + void Analyze() override; + + private: + enum SimulationEventType { + kProcessStream, + kProcessReverseStream, + }; + + void Initialize(); + bool HandleProcessStreamCall(); + bool HandleProcessReverseStreamCall(); + void PrepareProcessStreamCall(); + void PrepareReverseProcessStreamCall(); + static std::vector GetDefaultEventChain(); + static std::vector GetCustomEventChain( + absl::string_view filename); + + std::vector call_chain_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000000..bd1c50477a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +#include "modules/audio_processing/three_band_filter_bank.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Factors to take into account when choosing `kFilterSize`: +// 1. Higher `kFilterSize`, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// `kNumBands` * `kSparsity` * `kFilterSize` / 2, so it increases linearly +// with `kFilterSize`. +// 3. The computation complexity also increases linearly with `kFilterSize`. + +// The Matlab code to generate these `kFilterCoeffs` is: +// +// N = kNumBands * kSparsity * kFilterSize - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kFilterSize); +// +// The code below uses the values of kFilterSize, kNumBands and kSparsity +// specified in the header. + +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * `kNumBands`) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. + +constexpr int kSubSampling = ThreeBandFilterBank::kNumBands; +constexpr int kDctSize = ThreeBandFilterBank::kNumBands; +static_assert(ThreeBandFilterBank::kNumBands * + ThreeBandFilterBank::kSplitBandSize == + ThreeBandFilterBank::kFullBandSize, + "The full band must be split in equally sized subbands"); + +const float + kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = { + {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +constexpr int kZeroFilterIndex1 = 3; +constexpr int kZeroFilterIndex2 = 9; + +const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] = + {{2.f, 2.f, 2.f}, + {1.73205077f, 0.f, -1.73205077f}, + {1.f, -2.f, 1.f}, + {-1.f, 2.f, -1.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-2.f, -2.f, -2.f}, + {-1.73205077f, 0.f, 1.73205077f}, + {-1.f, 2.f, -1.f}, + {1.f, -2.f, 1.f}, + {1.73205077f, 0.f, -1.73205077f}}; + +// Filters the input signal `in` with the filter `filter` using a shift by +// `in_shift`, taking into account the previous state. +void FilterCore( + rtc::ArrayView filter, + rtc::ArrayView in, + const int in_shift, + rtc::ArrayView out, + rtc::ArrayView state) { + constexpr int kMaxInShift = (kStride - 1); + RTC_DCHECK_GE(in_shift, 0); + RTC_DCHECK_LE(in_shift, kMaxInShift); + std::fill(out.begin(), out.end(), 0.f); + + for (int k = 0; k < in_shift; ++k) { + for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize; + ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } + } + + for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) { + RTC_DCHECK_GE(shift, 0); + const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2)); + for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride; + i < kFilterSize; ++i, j -= kStride) { + out[k] += state[j] * filter[i]; + } + } + + for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift; + k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) { + for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) { + out[k] += in[j] * filter[i]; + } + } + + // Update current state. + std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize, + in.end(), state.begin()); +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank() { + RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters); + RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters); + for (int k = 0; k < kNumNonZeroFilters; ++k) { + RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize); + RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize); + + state_analysis_[k].fill(0.f); + state_synthesis_[k].fill(0.f); + } +} + +ThreeBandFilterBank::~ThreeBandFilterBank() = default; + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of `kNumBands`. +// 2. Filtering of `kSparsity` different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of `kSparsity`. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis( + rtc::ArrayView in, + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + out) { + // Initialize the output to zero. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(out[band].size(), kSplitBandSize); + std::fill(out[band].begin(), out[band].end(), 0); + } + + for (int downsampling_index = 0; downsampling_index < kSubSampling; + ++downsampling_index) { + // Downsample to form the filter input. + std::array in_subsampled; + for (int k = 0; k < kSplitBandSize; ++k) { + in_subsampled[k] = + in[(kSubSampling - 1) - downsampling_index + kSubSampling * k]; + } + + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = downsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); + + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_analysis_[filter_index]); + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Band and modulate the output. + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + float* out_band = out[band].data(); + for (int n = 0; n < kSplitBandSize; ++n) { + out_band[n] += dct_modulation[band] * out_subsampled[n]; + } + } + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of `kSparsity` and accumulating +// `kSparsity` signals with different delays. +// 3. Parallel to serial upsampling by a factor of `kNumBands`. +void ThreeBandFilterBank::Synthesis( + rtc::ArrayView, ThreeBandFilterBank::kNumBands> + in, + rtc::ArrayView out) { + std::fill(out.begin(), out.end(), 0); + for (int upsampling_index = 0; upsampling_index < kSubSampling; + ++upsampling_index) { + for (int in_shift = 0; in_shift < kStride; ++in_shift) { + // Choose filter, skip zero filters. + const int index = upsampling_index + in_shift * kSubSampling; + if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { + continue; + } + const int filter_index = + index < kZeroFilterIndex1 + ? index + : (index < kZeroFilterIndex2 ? index - 1 : index - 2); + + rtc::ArrayView filter( + kFilterCoeffs[filter_index]); + rtc::ArrayView dct_modulation( + kDctModulation[filter_index]); + rtc::ArrayView state(state_synthesis_[filter_index]); + + // Prepare filter input by modulating the banded input. + std::array in_subsampled; + std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f); + for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { + RTC_DCHECK_EQ(in[band].size(), kSplitBandSize); + const float* in_band = in[band].data(); + for (int n = 0; n < kSplitBandSize; ++n) { + in_subsampled[n] += dct_modulation[band] * in_band[n]; + } + } + + // Filter. + std::array out_subsampled; + FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); + + // Upsample. + constexpr float kUpsamplingScaling = kSubSampling; + for (int k = 0; k < kSplitBandSize; ++k) { + out[upsampling_index + kSubSampling * k] += + kUpsamplingScaling * out_subsampled[k]; + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000000..db66caba4a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include +#include +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +constexpr int kSparsity = 4; +constexpr int kStrideLog2 = 2; +constexpr int kStride = 1 << kStrideLog2; +constexpr int kNumZeroFilters = 2; +constexpr int kFilterSize = 4; +constexpr int kMemorySize = kFilterSize * kStride - 1; +static_assert(kMemorySize == 15, + "The memory size must be sufficient to provide memory for the " + "shifted filters"); + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + static const int kNumBands = 3; + static const int kFullBandSize = 480; + static const int kSplitBandSize = + ThreeBandFilterBank::kFullBandSize / ThreeBandFilterBank::kNumBands; + static const int kNumNonZeroFilters = + kSparsity * ThreeBandFilterBank::kNumBands - kNumZeroFilters; + + ThreeBandFilterBank(); + ~ThreeBandFilterBank(); + + // Splits `in` of size kFullBandSize into 3 downsampled frequency bands in + // `out`, each of size 160. + void Analysis(rtc::ArrayView in, + rtc::ArrayView, kNumBands> out); + + // Merges the 3 downsampled frequency bands in `in`, each of size 160, into + // `out`, which is of size kFullBandSize. + void Synthesis(rtc::ArrayView, kNumBands> in, + rtc::ArrayView out); + + private: + std::array, kNumNonZeroFilters> + state_analysis_; + std::array, kNumNonZeroFilters> + state_synthesis_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn new file mode 100644 index 0000000000..41aeab0abe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/BUILD.gn @@ -0,0 +1,133 @@ +# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_source_set("transient_suppressor_api") { + sources = [ "transient_suppressor.h" ] +} + +rtc_library("transient_suppressor_impl") { + visibility = [ + ":click_annotate", + ":transient_suppression_test", + ":transient_suppression_unittests", + "..:optionally_built_submodule_creators", + ] + sources = [ + "common.h", + "daubechies_8_wavelet_coeffs.h", + "dyadic_decimator.h", + "moving_moments.cc", + "moving_moments.h", + "transient_detector.cc", + "transient_detector.h", + "transient_suppressor_impl.cc", + "transient_suppressor_impl.h", + "windows_private.h", + "wpd_node.cc", + "wpd_node.h", + "wpd_tree.cc", + "wpd_tree.h", + ] + deps = [ + ":transient_suppressor_api", + ":voice_probability_delay_unit", + "../../../common_audio:common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio:fir_filter", + "../../../common_audio:fir_filter_factory", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:logging", + ] +} + +rtc_library("voice_probability_delay_unit") { + sources = [ + "voice_probability_delay_unit.cc", + "voice_probability_delay_unit.h", + ] + deps = [ "../../../rtc_base:checks" ] +} + +if (rtc_include_tests) { + if (!build_with_chromium) { + rtc_executable("click_annotate") { + testonly = true + sources = [ + "click_annotate.cc", + "file_utils.cc", + "file_utils.h", + ] + deps = [ + ":transient_suppressor_impl", + "..:audio_processing", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + ] + } + + rtc_executable("transient_suppression_test") { + testonly = true + sources = [ + "file_utils.cc", + "file_utils.h", + "transient_suppression_test.cc", + "voice_probability_delay_unit_unittest.cc", + ] + deps = [ + ":transient_suppressor_api", + ":transient_suppressor_impl", + ":voice_probability_delay_unit", + "..:audio_processing", + "../../../common_audio", + "../../../rtc_base/system:file_wrapper", + "../../../system_wrappers", + "../../../test:fileutils", + "../../../test:test_support", + "../agc:level_estimation", + "//testing/gtest", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + "//third_party/abseil-cpp/absl/types:optional", + ] + } + } + + rtc_library("transient_suppression_unittests") { + testonly = true + sources = [ + "dyadic_decimator_unittest.cc", + "file_utils.cc", + "file_utils.h", + "file_utils_unittest.cc", + "moving_moments_unittest.cc", + "transient_detector_unittest.cc", + "transient_suppressor_unittest.cc", + "voice_probability_delay_unit_unittest.cc", + "wpd_node_unittest.cc", + "wpd_tree_unittest.cc", + ] + deps = [ + ":transient_suppressor_api", + ":transient_suppressor_impl", + ":voice_probability_delay_unit", + "../../../rtc_base:stringutils", + "../../../rtc_base/system:file_wrapper", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gtest", + ] + absl_deps = [ + "//third_party/abseil-cpp/absl/strings", + "//third_party/abseil-cpp/absl/types:optional", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc b/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc new file mode 100644 index 0000000000..f3f040f9aa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/click_annotate.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include +#include + +#include "modules/audio_processing/transient/file_utils.h" +#include "modules/audio_processing/transient/transient_detector.h" +#include "rtc_base/system/file_wrapper.h" + +using webrtc::FileWrapper; +using webrtc::TransientDetector; + +// Application to generate a RTP timing file. +// Opens the PCM file and divides the signal in frames. +// Creates a send times array, one for each step. +// Each block that contains a transient, has an infinite send time. +// The resultant array is written to a DAT file +// Returns -1 on error or `lost_packets` otherwise. +int main(int argc, char* argv[]) { + if (argc != 5) { + printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]); + printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]); + printf("Opens the PCMfile with sampleRate in Hertz.\n"); + printf("Creates a send times array, one for each chunkSize "); + printf("milliseconds step.\n"); + printf("Each block that contains a transient, has an infinite send time. "); + printf("The resultant array is written to a DATfile.\n\n"); + return 0; + } + + FileWrapper pcm_file = FileWrapper::OpenReadOnly(argv[1]); + if (!pcm_file.is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[1]); + return -1; + } + + FileWrapper dat_file = FileWrapper::OpenWriteOnly(argv[2]); + if (!dat_file.is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[2]); + return -1; + } + + int chunk_size_ms = atoi(argv[3]); + if (chunk_size_ms <= 0) { + printf("\nThe chunkSize must be a positive integer\n\n"); + return -1; + } + + int sample_rate_hz = atoi(argv[4]); + if (sample_rate_hz <= 0) { + printf("\nThe sampleRate must be a positive integer\n\n"); + return -1; + } + + TransientDetector detector(sample_rate_hz); + int lost_packets = 0; + size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000; + std::unique_ptr audio_buffer(new float[audio_buffer_length]); + std::vector send_times; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer( + &pcm_file, audio_buffer_length, audio_buffer.get()); + for (int time = 0; file_samples_read > 0; time += chunk_size_ms) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < audio_buffer_length; ++i) { + audio_buffer[i] = 0.0; + } + float value = + detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0); + if (value < 0.5f) { + value = time; + } else { + value = FLT_MAX; + ++lost_packets; + } + send_times.push_back(value); + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer( + &pcm_file, audio_buffer_length, audio_buffer.get()); + } + + size_t floats_written = + WriteFloatBufferToFile(&dat_file, send_times.size(), &send_times[0]); + + if (floats_written == 0) { + printf("\nThe send times could not be written to DAT file\n\n"); + return -1; + } + + pcm_file.Close(); + dat_file.Close(); + + return lost_packets; +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/common.h b/third_party/libwebrtc/modules/audio_processing/transient/common.h new file mode 100644 index 0000000000..63c9a7b315 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +namespace webrtc { +namespace ts { + +static const float kPi = 3.14159265358979323846f; +static const int kChunkSizeMs = 10; +enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 +}; + +} // namespace ts +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h new file mode 100644 index 0000000000..92233bfd74 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file defines the coefficients of the FIR based approximation of +// the Meyer Wavelet +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ + +// Decomposition coefficients Daubechies 8. + +namespace webrtc { + +const int kDaubechies8CoefficientsLength = 16; + +const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength] = { + -5.44158422430816093862e-02f, 3.12871590914465924627e-01f, + -6.75630736298012846142e-01f, 5.85354683654869090148e-01f, + 1.58291052560238926228e-02f, -2.84015542962428091389e-01f, + -4.72484573997972536787e-04f, 1.28747426620186011803e-01f, + 1.73693010020221083600e-02f, -4.40882539310647192377e-02f, + -1.39810279170155156436e-02f, 8.74609404701565465445e-03f, + 4.87035299301066034600e-03f, -3.91740372995977108837e-04f, + -6.75449405998556772109e-04f, -1.17476784002281916305e-04f}; + +const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = { + -1.17476784002281916305e-04f, 6.75449405998556772109e-04f, + -3.91740372995977108837e-04f, -4.87035299301066034600e-03f, + 8.74609404701565465445e-03f, 1.39810279170155156436e-02f, + -4.40882539310647192377e-02f, -1.73693010020221083600e-02f, + 1.28747426620186011803e-01f, 4.72484573997972536787e-04f, + -2.84015542962428091389e-01f, -1.58291052560238926228e-02f, + 5.85354683654869090148e-01f, 6.75630736298012846142e-01f, + 3.12871590914465924627e-01f, 5.44158422430816093862e-02f}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h new file mode 100644 index 0000000000..52467e8c25 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ + +#include + +// Provides a set of static methods to perform dyadic decimations. + +namespace webrtc { + +// Returns the proper length of the output buffer that you should use for the +// given `in_length` and decimation `odd_sequence`. +// Return -1 on error. +inline size_t GetOutLengthToDyadicDecimate(size_t in_length, + bool odd_sequence) { + size_t out_length = in_length / 2; + + if (in_length % 2 == 1 && !odd_sequence) { + ++out_length; + } + + return out_length; +} + +// Performs a dyadic decimation: removes every odd/even member of a sequence +// halving its overall length. +// Arguments: +// in: array of `in_length`. +// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...); +// if true, the even members will be removed (0, 2, 4, ...). +// out: array of `out_length`. `out_length` must be large enough to +// hold the decimated output. The necessary length can be provided by +// GetOutLengthToDyadicDecimate(). +// Must be previously allocated. +// Returns the number of output samples, -1 on error. +template +static size_t DyadicDecimate(const T* in, + size_t in_length, + bool odd_sequence, + T* out, + size_t out_length) { + size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence); + + if (!in || !out || in_length <= 0 || out_length < half_length) { + return 0; + } + + size_t output_samples = 0; + size_t index_adjustment = odd_sequence ? 1 : 0; + for (output_samples = 0; output_samples < half_length; ++output_samples) { + out[output_samples] = in[output_samples * 2 + index_adjustment]; + } + + return output_samples; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc new file mode 100644 index 0000000000..e4776d694f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/dyadic_decimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kEvenBufferLength = 6; +static const size_t kOddBufferLength = 5; +static const size_t kOutBufferLength = 3; + +int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5}; +int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4}; +int16_t test_buffer_out[kOutBufferLength]; + +TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) { + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false)); + EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true)); +} + +TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) { + size_t out_samples = 0; + + out_samples = DyadicDecimate(static_cast(NULL), kEvenBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + static_cast(NULL), kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + // Less than required `out_length`. + out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + test_buffer_out, 2); + EXPECT_EQ(0u, out_samples); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, kEvenBufferLength, + true, // Odd sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); + EXPECT_EQ(5, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, kOddBufferLength, + false, // Even sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, kOddBufferLength, + true, // Odd sequence. + test_buffer_out, kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc new file mode 100644 index 0000000000..58f99325d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include + +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { + +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) { + if (!bytes || !out) { + return -1; + } + + uint32_t binary_value = 0; + for (int i = 3; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) { + if (!bytes || !out) { + return -1; + } + + uint64_t binary_value = 0; + for (int i = 7; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) { + if (!out_bytes) { + return -1; + } + + uint32_t binary_value = bit_cast(value); + for (size_t i = 0; i < 4; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) { + if (!out_bytes) { + return -1; + } + + uint64_t binary_value = bit_cast(value); + for (size_t i = 0; i < 8; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[2]); + + size_t int16s_read = 0; + + while (int16s_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 2); + if (bytes_read < 2) { + break; + } + int16_t value = byte_array[1]; + value <<= 8; + value += byte_array[0]; + buffer[int16s_read] = value; + ++int16s_read; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadFloatBufferFromFile(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[4]); + + size_t floats_read = 0; + + while (floats_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 4); + if (bytes_read < 4) { + break; + } + ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]); + ++floats_read; + } + + return floats_read; +} + +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[8]); + + size_t doubles_read = 0; + + while (doubles_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 8); + if (bytes_read < 8) { + break; + } + ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]); + ++doubles_read; + } + + return doubles_read; +} + +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[2]); + + size_t int16s_written = 0; + + for (int16s_written = 0; int16s_written < length; ++int16s_written) { + // Get byte representation. + byte_array[0] = buffer[int16s_written] & 0xFF; + byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF; + + file->Write(byte_array.get(), 2); + } + + file->Flush(); + + return int16s_written; +} + +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[4]); + + size_t floats_written = 0; + + for (floats_written = 0; floats_written < length; ++floats_written) { + // Get byte representation. + ConvertFloatToByteArray(buffer[floats_written], byte_array.get()); + + file->Write(byte_array.get(), 4); + } + + file->Flush(); + + return floats_written; +} + +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr byte_array(new uint8_t[8]); + + size_t doubles_written = 0; + + for (doubles_written = 0; doubles_written < length; ++doubles_written) { + // Get byte representation. + ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get()); + + file->Write(byte_array.get(), 8); + } + + file->Flush(); + + return doubles_written; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h new file mode 100644 index 0000000000..b748337773 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ + +#include + +#include "rtc_base/system/file_wrapper.h" + +namespace webrtc { + +// This is a copy of the cast included in the Chromium codebase here: +// http://cs.chromium.org/src/third_party/cld/base/casts.h +template +inline Dest bit_cast(const Source& source) { + // A compile error here means your Dest and Source have different sizes. + static_assert(sizeof(Dest) == sizeof(Source), + "Dest and Source have different sizes"); + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Converts the byte array with binary float representation to float. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out); + +// Converts the byte array with binary double representation to double. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out); + +// Converts a float to a byte array with binary float representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]); + +// Converts a double to a byte array with binary double representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]); + +// Reads `length` 16-bit integers from `file` to `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer); + +// Reads `length` 16-bit integers from `file` and stores those values +// (converting them) in `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer); + +// Reads `length` 16-bit integers from `file` and stores those values +// (converting them) in `buffer`. +// `file` must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer); + +// Reads `length` floats in binary representation (4 bytes) from `file` to +// `buffer`. +// `file` must be previously opened. +// Returns the number of floats read or -1 on error. +size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer); + +// Reads `length` doubles in binary representation (8 bytes) from `file` to +// `buffer`. +// `file` must be previously opened. +// Returns the number of doubles read or -1 on error. +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer); + +// Writes `length` 16-bit integers from `buffer` in binary representation (2 +// bytes) to `file`. It flushes `file`, so after this call there are no +// writings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer); + +// Writes `length` floats from `buffer` in binary representation (4 bytes) to +// `file`. It flushes `file`, so after this call there are no writtings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer); + +// Writes `length` doubles from `buffer` in binary representation (8 bytes) to +// `file`. It flushes `file`, so after this call there are no writings pending. +// `file` must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc new file mode 100644 index 0000000000..a9dddb1eda --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/file_utils_unittest.cc @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40}; +static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40}; +static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66}; + +static const uint8_t kPiBytes[8] = {0x18, 0x2D, 0x44, 0x54, + 0xFB, 0x21, 0x09, 0x40}; +static const uint8_t kEBytes[8] = {0x69, 0x57, 0x14, 0x8B, + 0x0A, 0xBF, 0x05, 0x40}; +static const uint8_t kAvogadroBytes[8] = {0xF4, 0xBC, 0xA8, 0xDF, + 0x85, 0xE1, 0xDF, 0x44}; + +static const double kPi = 3.14159265358979323846; +static const double kE = 2.71828182845904523536; +static const double kAvogadro = 602214100000000000000000.0; + +class TransientFileUtilsTest : public ::testing::Test { + protected: + TransientFileUtilsTest() + : kTestFileName( + test::ResourcePath("audio_processing/transient/double-utils", + "dat")), + kTestFileNamef( + test::ResourcePath("audio_processing/transient/float-utils", + "dat")) {} + + ~TransientFileUtilsTest() override { CleanupTempFiles(); } + + std::string CreateTempFilename(absl::string_view dir, + absl::string_view prefix) { + std::string filename = test::TempFilename(dir, prefix); + temp_filenames_.push_back(filename); + return filename; + } + + void CleanupTempFiles() { + for (const std::string& filename : temp_filenames_) { + remove(filename.c_str()); + } + temp_filenames_.clear(); + } + + // This file (used in some tests) contains binary data. The data correspond to + // the double representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileName; + + // This file (used in some tests) contains binary data. The data correspond to + // the float representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileNamef; + + // List of temporary filenames created by CreateTempFilename. + std::vector temp_filenames_; +}; + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToFloat DISABLED_ConvertByteArrayToFloat +#else +#define MAYBE_ConvertByteArrayToFloat ConvertByteArrayToFloat +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToFloat) { + float value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value)); + EXPECT_FLOAT_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value)); + EXPECT_FLOAT_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value)); + EXPECT_FLOAT_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToDouble DISABLED_ConvertByteArrayToDouble +#else +#define MAYBE_ConvertByteArrayToDouble ConvertByteArrayToDouble +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToDouble) { + double value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value)); + EXPECT_DOUBLE_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value)); + EXPECT_DOUBLE_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value)); + EXPECT_DOUBLE_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertFloatToByteArray DISABLED_ConvertFloatToByteArray +#else +#define MAYBE_ConvertFloatToByteArray ConvertFloatToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertFloatToByteArray) { + std::unique_ptr bytes(new uint8_t[4]); + + EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertDoubleToByteArray DISABLED_ConvertDoubleToByteArray +#else +#define MAYBE_ConvertDoubleToByteArray ConvertDoubleToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertDoubleToByteArray) { + std::unique_ptr bytes(new uint8_t[8]); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16BufferFromFile DISABLED_ReadInt16BufferFromFile +#else +#define MAYBE_ReadInt16BufferFromFile ReadInt16BufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16BufferFromFile) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new int16_t[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new int16_t[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_EQ(11544, buffer[0]); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToFloatBuffer \ + DISABLED_ReadInt16FromFileToFloatBuffer +#else +#define MAYBE_ReadInt16FromFileToFloatBuffer ReadInt16FromFileToFloatBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToFloatBuffer) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(&file, kBufferLength, buffer.get())); + + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToDoubleBuffer \ + DISABLED_ReadInt16FromFileToDoubleBuffer +#else +#define MAYBE_ReadInt16FromFileToDoubleBuffer ReadInt16FromFileToDoubleBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToDoubleBuffer) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(&file, kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToDoubleBuffer(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadFloatBufferFromFile DISABLED_ReadFloatBufferFromFile +#else +#define MAYBE_ReadFloatBufferFromFile ReadFloatBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadFloatBufferFromFile) { + std::string test_filename = kTestFileNamef; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileNamef.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadDoubleBufferFromFile DISABLED_ReadDoubleBufferFromFile +#else +#define MAYBE_ReadDoubleBufferFromFile ReadDoubleBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadDoubleBufferFromFile) { + std::string test_filename = kTestFileName; + + FileWrapper file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLength, buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); + + file.Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteInt16BufferToFile DISABLED_WriteInt16BufferToFile +#else +#define MAYBE_WriteInt16BufferToFile WriteInt16BufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteInt16BufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new int16_t[kBufferLength]); + std::unique_ptr read_buffer(new int16_t[kBufferLength]); + + written_buffer[0] = 1; + written_buffer[1] = 2; + written_buffer[2] = 3; + + EXPECT_EQ(kBufferLength, + WriteInt16BufferToFile(&file, kBufferLength, written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadInt16BufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteFloatBufferToFile DISABLED_WriteFloatBufferToFile +#else +#define MAYBE_WriteFloatBufferToFile WriteFloatBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteFloatBufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new float[kBufferLength]); + std::unique_ptr read_buffer(new float[kBufferLength]); + + written_buffer[0] = static_cast(kPi); + written_buffer[1] = static_cast(kE); + written_buffer[2] = static_cast(kAvogadro); + + EXPECT_EQ(kBufferLength, + WriteFloatBufferToFile(&file, kBufferLength, written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadFloatBufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteDoubleBufferToFile DISABLED_WriteDoubleBufferToFile +#else +#define MAYBE_WriteDoubleBufferToFile WriteDoubleBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteDoubleBufferToFile) { + std::string kOutFileName = + CreateTempFilename(test::OutputPath(), "utils_test"); + + FileWrapper file = FileWrapper::OpenWriteOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr written_buffer(new double[kBufferLength]); + std::unique_ptr read_buffer(new double[kBufferLength]); + + written_buffer[0] = kPi; + written_buffer[1] = kE; + written_buffer[2] = kAvogadro; + + EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(&file, kBufferLength, + written_buffer.get())); + + file.Close(); + + file = FileWrapper::OpenReadOnly(kOutFileName); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, + ReadDoubleBufferFromFile(&file, kBufferLength, read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ExpectedErrorReturnValues DISABLED_ExpectedErrorReturnValues +#else +#define MAYBE_ExpectedErrorReturnValues ExpectedErrorReturnValues +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ExpectedErrorReturnValues) { + std::string test_filename = kTestFileName; + + double value; + std::unique_ptr int16_buffer(new int16_t[1]); + std::unique_ptr double_buffer(new double[1]); + FileWrapper file; + + EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value)); + EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL)); + + EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL)); + + // Tests with file not opened. + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 1, double_buffer.get())); + + file = FileWrapper::OpenReadOnly(test_filename); + ASSERT_TRUE(file.is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 1, NULL)); + EXPECT_EQ(0u, ReadInt16BufferFromFile(&file, 0, int16_buffer.get())); + + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 1, NULL)); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(&file, 0, double_buffer.get())); + + EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 1, NULL)); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(&file, 0, double_buffer.get())); + + EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 1, NULL)); + EXPECT_EQ(0u, WriteInt16BufferToFile(&file, 0, int16_buffer.get())); + + EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 1, NULL)); + EXPECT_EQ(0u, WriteDoubleBufferToFile(&file, 0, double_buffer.get())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc new file mode 100644 index 0000000000..83810bfe3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +MovingMoments::MovingMoments(size_t length) + : length_(length), queue_(), sum_(0.0), sum_of_squares_(0.0) { + RTC_DCHECK_GT(length, 0); + for (size_t i = 0; i < length; ++i) { + queue_.push(0.0); + } +} + +MovingMoments::~MovingMoments() {} + +void MovingMoments::CalculateMoments(const float* in, + size_t in_length, + float* first, + float* second) { + RTC_DCHECK(in); + RTC_DCHECK_GT(in_length, 0); + RTC_DCHECK(first); + RTC_DCHECK(second); + + for (size_t i = 0; i < in_length; ++i) { + const float old_value = queue_.front(); + queue_.pop(); + queue_.push(in[i]); + + sum_ += in[i] - old_value; + sum_of_squares_ += in[i] * in[i] - old_value * old_value; + first[i] = sum_ / length_; + second[i] = std::max(0.f, sum_of_squares_ / length_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h new file mode 100644 index 0000000000..70451dcb71 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ + +#include + +#include + +namespace webrtc { + +// Calculates the first and second moments for each value of a buffer taking +// into account a given number of previous values. +// It preserves its state, so it can be multiple-called. +// TODO(chadan): Implement a function that takes a buffer of first moments and a +// buffer of second moments; and calculates the variances. When needed. +// TODO(chadan): Add functionality to update with a buffer but only output are +// the last values of the moments. When needed. +class MovingMoments { + public: + // Creates a Moving Moments object, that uses the last `length` values + // (including the new value introduced in every new calculation). + explicit MovingMoments(size_t length); + ~MovingMoments(); + + // Calculates the new values using `in`. Results will be in the out buffers. + // `first` and `second` must be allocated with at least `in_length`. + void CalculateMoments(const float* in, + size_t in_length, + float* first, + float* second); + + private: + size_t length_; + // A queue holding the `length_` latest input values. + std::queue queue_; + // Sum of the values of the queue. + float sum_; + // Sum of the squares of the values of the queue. + float sum_of_squares_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc new file mode 100644 index 0000000000..b0e613e7ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/moving_moments_unittest.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const float kTolerance = 0.0001f; + +class MovingMomentsTest : public ::testing::Test { + protected: + static const size_t kMovingMomentsBufferLength = 5; + static const size_t kMaxOutputLength = 20; // Valid for this tests only. + + virtual void SetUp(); + // Calls CalculateMoments and verifies that it produces the expected + // outputs. + void CalculateMomentsAndVerify(const float* input, + size_t input_length, + const float* expected_mean, + const float* expected_mean_squares); + + std::unique_ptr moving_moments_; + float output_mean_[kMaxOutputLength]; + float output_mean_squares_[kMaxOutputLength]; +}; + +const size_t MovingMomentsTest::kMaxOutputLength; + +void MovingMomentsTest::SetUp() { + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); +} + +void MovingMomentsTest::CalculateMomentsAndVerify( + const float* input, + size_t input_length, + const float* expected_mean, + const float* expected_mean_squares) { + ASSERT_LE(input_length, kMaxOutputLength); + + moving_moments_->CalculateMoments(input, input_length, output_mean_, + output_mean_squares_); + + for (size_t i = 1; i < input_length; ++i) { + EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance); + EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance); + } +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) { + const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) { + const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {1.f, 2.f, 3.f, 4.f, 5.f, + 5.f, 5.f, 5.f, 5.f, 5.f}; + const float expected_mean_squares[kInputLength] = { + 5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) { + const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.2f, 0.6f, 1.2f, 2.f, 3.f, + 4.f, 5.f, 6.f, 7.f}; + const float expected_mean_squares[kInputLength] = { + 0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) { + const float kInput[] = {-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {-0.2f, -0.6f, -1.2f, -2.f, -3.f, + -4.f, -5.f, -6.f, -7.f}; + const float expected_mean_squares[kInputLength] = { + 0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) { + const size_t kMovingMomentsBufferLength = 4; + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + const float kInput[] = {1.f, -1.f, 1.f, -1.f, 1.f, + -1.f, 1.f, -1.f, 1.f, -1.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.25f, 0.f, 0.25f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = { + 0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) { + const float kInput[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = { + 0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares[kInputLength] = {0.008f, 0.026f, 0.076f, + 0.174f, 0.1764f, 0.1718f, + 0.1596f, 0.1168f, 0.0294f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) { + const float kInputFirstCall[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputFirstCallLength = + sizeof(kInputFirstCall) / sizeof(kInputFirstCall[0]); + const float kInputSecondCall[] = {0.29f, 0.31f}; + const size_t kInputSecondCallLength = + sizeof(kInputSecondCall) / sizeof(kInputSecondCall[0]); + const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f}; + const size_t kInputThirdCallLength = + sizeof(kInputThirdCall) / sizeof(kInputThirdCall[0]); + + const float expected_mean_first_call[kInputFirstCallLength] = { + 0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares_first_call[kInputFirstCallLength] = { + 0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, + 0.1718f, 0.1596f, 0.1168f, 0.0294f}; + + const float expected_mean_second_call[kInputSecondCallLength] = {0.202f, + 0.238f}; + const float expected_mean_squares_second_call[kInputSecondCallLength] = { + 0.0438f, 0.0596f}; + + const float expected_mean_third_call[kInputThirdCallLength] = { + 0.278f, 0.322f, 0.362f, 0.398f}; + const float expected_mean_squares_third_call[kInputThirdCallLength] = { + 0.0812f, 0.1076f, 0.134f, 0.1614f}; + + CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength, + expected_mean_first_call, + expected_mean_squares_first_call); + + CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength, + expected_mean_second_call, + expected_mean_squares_second_call); + + CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength, + expected_mean_third_call, + expected_mean_squares_third_call); +} + +TEST_F(MovingMomentsTest, VerifySampleBasedVsBlockBasedCalculation) { + const float kInput[] = {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, + 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + float output_mean_block_based[kInputLength]; + float output_mean_squares_block_based[kInputLength]; + + float output_mean_sample_based; + float output_mean_squares_sample_based; + + moving_moments_->CalculateMoments(kInput, kInputLength, + output_mean_block_based, + output_mean_squares_block_based); + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + for (size_t i = 0; i < kInputLength; ++i) { + moving_moments_->CalculateMoments(&kInput[i], 1, &output_mean_sample_based, + &output_mean_squares_sample_based); + EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based); + EXPECT_FLOAT_EQ(output_mean_squares_block_based[i], + output_mean_squares_sample_based); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m b/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m new file mode 100644 index 0000000000..8e12ab920b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/plotDetection.m @@ -0,0 +1,22 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [] = plotDetection(PCMfile, DATfile, fs, chunkSize) +%[] = plotDetection(PCMfile, DATfile, fs, chunkSize) +% +%Plots the signal alongside the detection values. +% +%PCMfile: The file of the input signal in PCM format. +%DATfile: The file containing the detection values in binary float format. +%fs: The sample rate of the signal in Hertz. +%chunkSize: The chunk size used to compute the detection values in seconds. +[x, tx] = readPCM(PCMfile, fs); +[d, td] = readDetection(DATfile, fs, chunkSize); +plot(tx, x, td, d); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m b/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m new file mode 100644 index 0000000000..832bf31ec8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/readDetection.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [d, t] = readDetection(file, fs, chunkSize) +%[d, t] = readDetection(file, fs, chunkSize) +% +%Reads a detection signal from a DAT file. +% +%d: The detection signal. +%t: The respective time vector. +% +%file: The DAT file where the detection signal is stored in float format. +%fs: The signal sample rate in Hertz. +%chunkSize: The chunk size used for the detection in seconds. +fid = fopen(file); +d = fread(fid, inf, 'float'); +fclose(fid); +t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs); +d = d(floor(t / chunkSize) + 1); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m b/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m new file mode 100644 index 0000000000..cd3cef8a3c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/test/readPCM.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [x, t] = readPCM(file, fs) +%[x, t] = readPCM(file, fs) +% +%Reads a signal from a PCM file. +% +%x: The read signal after normalization. +%t: The respective time vector. +% +%file: The PCM file where the signal is stored in int16 format. +%fs: The signal sample rate in Hertz. +fid = fopen(file); +x = fread(fid, inf, 'int16'); +fclose(fid); +x = x - mean(x); +x = x / max(abs(x)); +t = 0:(1 / fs):((length(x) - 1) / fs); diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc new file mode 100644 index 0000000000..5c35505368 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include +#include + +#include +#include + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_node.h" +#include "modules/audio_processing/transient/wpd_tree.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const int kTransientLengthMs = 30; +static const int kChunksAtStartupLeftToDelete = + kTransientLengthMs / ts::kChunkSizeMs; +static const float kDetectThreshold = 16.f; + +TransientDetector::TransientDetector(int sample_rate_hz) + : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), + last_first_moment_(), + last_second_moment_(), + chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), + reference_energy_(1.f), + using_reference_(false) { + RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; + // Adjustment to avoid data loss while downsampling, making + // `samples_per_chunk_` and `samples_per_transient` always divisible by + // `kLeaves`. + samples_per_chunk_ -= samples_per_chunk_ % kLeaves; + samples_per_transient -= samples_per_transient % kLeaves; + + tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; + wpd_tree_.reset(new WPDTree(samples_per_chunk_, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, kLevels)); + for (size_t i = 0; i < kLeaves; ++i) { + moving_moments_[i].reset( + new MovingMoments(samples_per_transient / kLeaves)); + } + + first_moments_.reset(new float[tree_leaves_data_length_]); + second_moments_.reset(new float[tree_leaves_data_length_]); + + for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { + previous_results_.push_back(0.f); + } +} + +TransientDetector::~TransientDetector() {} + +float TransientDetector::Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length) { + RTC_DCHECK(data); + RTC_DCHECK_EQ(samples_per_chunk_, data_length); + + // TODO(aluebs): Check if these errors can logically happen and if not assert + // on them. + if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { + return -1.f; + } + + float result = 0.f; + + for (size_t i = 0; i < kLeaves; ++i) { + WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); + + moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_, + first_moments_.get(), + second_moments_.get()); + + // Add value delayed (Use the last moments from the last call to Detect). + float unbiased_data = leaf->data()[0] - last_first_moment_[i]; + result += + unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); + + // Add new values. + for (size_t j = 1; j < tree_leaves_data_length_; ++j) { + unbiased_data = leaf->data()[j] - first_moments_[j - 1]; + result += + unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); + } + + last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; + last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; + } + + result /= tree_leaves_data_length_; + + result *= ReferenceDetectionValue(reference_data, reference_length); + + if (chunks_at_startup_left_to_delete_ > 0) { + chunks_at_startup_left_to_delete_--; + result = 0.f; + } + + if (result >= kDetectThreshold) { + result = 1.f; + } else { + // Get proportional value. + // Proportion achieved with a squared raised cosine function with domain + // [0, kDetectThreshold) and image [0, 1), it's always increasing. + const float horizontal_scaling = ts::kPi / kDetectThreshold; + const float kHorizontalShift = ts::kPi; + const float kVerticalScaling = 0.5f; + const float kVerticalShift = 1.f; + + result = (std::cos(result * horizontal_scaling + kHorizontalShift) + + kVerticalShift) * + kVerticalScaling; + result *= result; + } + + previous_results_.pop_front(); + previous_results_.push_back(result); + + // In the current implementation we return the max of the current result and + // the previous results, so the high results have a width equals to + // `transient_length`. + return *std::max_element(previous_results_.begin(), previous_results_.end()); +} + +// Looks for the highest slope and compares it with the previous ones. +// An exponential transformation takes this to the [0, 1] range. This value is +// multiplied by the detection result to avoid false positives. +float TransientDetector::ReferenceDetectionValue(const float* data, + size_t length) { + if (data == NULL) { + using_reference_ = false; + return 1.f; + } + static const float kEnergyRatioThreshold = 0.2f; + static const float kReferenceNonLinearity = 20.f; + static const float kMemory = 0.99f; + float reference_energy = 0.f; + for (size_t i = 1; i < length; ++i) { + reference_energy += data[i] * data[i]; + } + if (reference_energy == 0.f) { + using_reference_ = false; + return 1.f; + } + RTC_DCHECK_NE(0, reference_energy_); + float result = 1.f / (1.f + std::exp(kReferenceNonLinearity * + (kEnergyRatioThreshold - + reference_energy / reference_energy_))); + reference_energy_ = + kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; + + using_reference_ = true; + + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h new file mode 100644 index 0000000000..a3dbb7ffde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ + +#include + +#include +#include + +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +// This is an implementation of the transient detector described in "Causal +// Wavelet based transient detector". +// Calculates the log-likelihood of a transient to happen on a signal at any +// given time based on the previous samples; it uses a WPD tree to analyze the +// signal. It preserves its state, so it can be multiple-called. +class TransientDetector { + public: + // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree + // of 3 levels. Make an overloaded constructor to allow different wavelets and + // depths of the tree. When needed. + + // Creates a wavelet based transient detector. + TransientDetector(int sample_rate_hz); + + ~TransientDetector(); + + // Calculates the log-likelihood of the existence of a transient in `data`. + // `data_length` has to be equal to `samples_per_chunk_`. + // Returns a value between 0 and 1, as a non linear representation of this + // likelihood. + // Returns a negative value on error. + float Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length); + + bool using_reference() { return using_reference_; } + + private: + float ReferenceDetectionValue(const float* data, size_t length); + + static const size_t kLevels = 3; + static const size_t kLeaves = 1 << kLevels; + + size_t samples_per_chunk_; + + std::unique_ptr wpd_tree_; + size_t tree_leaves_data_length_; + + // A MovingMoments object is needed for each leaf in the WPD tree. + std::unique_ptr moving_moments_[kLeaves]; + + std::unique_ptr first_moments_; + std::unique_ptr second_moments_; + + // Stores the last calculated moments from the previous detection. + float last_first_moment_[kLeaves]; + float last_second_moment_[kLeaves]; + + // We keep track of the previous results from the previous chunks, so it can + // be used to effectively give results according to the `transient_length`. + std::deque previous_results_; + + // Number of chunks that are going to return only zeros at the beginning of + // the detection. It helps to avoid infs and nans due to the lack of + // information. + int chunks_at_startup_left_to_delete_; + + float reference_energy_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc new file mode 100644 index 0000000000..a7364626fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_detector_unittest.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include +#include + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const int kSampleRatesHz[] = {ts::kSampleRate8kHz, ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz}; +static const size_t kNumberOfSampleRates = + sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz); + +// This test is for the correctness of the transient detector. +// Checks the results comparing them with the ones stored in the detect files in +// the directory: resources/audio_processing/transient/ +// The files contain all the results in double precision (Little endian). +// The audio files used with different sample rates are stored in the same +// directory. +#if defined(WEBRTC_IOS) +TEST(TransientDetectorTest, DISABLED_CorrectnessBasedOnFiles) { +#else +TEST(TransientDetectorTest, CorrectnessBasedOnFiles) { +#endif + for (size_t i = 0; i < kNumberOfSampleRates; ++i) { + int sample_rate_hz = kSampleRatesHz[i]; + + // Prepare detect file. + rtc::StringBuilder detect_file_name; + detect_file_name << "audio_processing/transient/detect" + << (sample_rate_hz / 1000) << "kHz"; + + FileWrapper detect_file = FileWrapper::OpenReadOnly( + test::ResourcePath(detect_file_name.str(), "dat")); + + bool file_opened = detect_file.is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" + << detect_file_name.str().c_str(); + + // Prepare audio file. + rtc::StringBuilder audio_file_name; + audio_file_name << "audio_processing/transient/audio" + << (sample_rate_hz / 1000) << "kHz"; + + FileWrapper audio_file = FileWrapper::OpenReadOnly( + test::ResourcePath(audio_file_name.str(), "pcm")); + + // Create detector. + TransientDetector detector(sample_rate_hz); + + const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000; + std::unique_ptr buffer(new float[buffer_length]); + + const float kTolerance = 0.02f; + + size_t frames_read = 0; + + while (ReadInt16FromFileToFloatBuffer(&audio_file, buffer_length, + buffer.get()) == buffer_length) { + ++frames_read; + + float detector_value = + detector.Detect(buffer.get(), buffer_length, NULL, 0); + double file_value; + ASSERT_EQ(1u, ReadDoubleBufferFromFile(&detect_file, 1, &file_value)) + << "Detect test file is malformed.\n"; + + // Compare results with data from the matlab test file. + EXPECT_NEAR(file_value, detector_value, kTolerance) + << "Frame: " << frames_read; + } + + detect_file.Close(); + audio_file.Close(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc new file mode 100644 index 0000000000..2d8baf9416 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppression_test.cc @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/transient_suppressor_impl.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +ABSL_FLAG(std::string, in_file_name, "", "PCM file that contains the signal."); +ABSL_FLAG(std::string, + detection_file_name, + "", + "PCM file that contains the detection signal."); +ABSL_FLAG(std::string, + reference_file_name, + "", + "PCM file that contains the reference signal."); + +ABSL_FLAG(int, + chunk_size_ms, + 10, + "Time between each chunk of samples in milliseconds."); + +ABSL_FLAG(int, + sample_rate_hz, + 16000, + "Sampling frequency of the signal in Hertz."); +ABSL_FLAG(int, + detection_rate_hz, + 0, + "Sampling frequency of the detection signal in Hertz."); + +ABSL_FLAG(int, num_channels, 1, "Number of channels."); + +namespace webrtc { + +const char kUsage[] = + "\nDetects and suppresses transients from file.\n\n" + "This application loads the signal from the in_file_name with a specific\n" + "num_channels and sample_rate_hz, the detection signal from the\n" + "detection_file_name with a specific detection_rate_hz, and the reference\n" + "signal from the reference_file_name with sample_rate_hz, divides them\n" + "into chunk_size_ms blocks, computes its voice value and depending on the\n" + "voice_threshold does the respective restoration. You can always get the\n" + "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" + "1 respectively.\n\n"; + +// Read next buffers from the test files (signed 16-bit host-endian PCM +// format). audio_buffer has int16 samples, detection_buffer has float samples +// with range [-32768,32767], and reference_buffer has float samples with range +// [-1,1]. Return true iff all the buffers were filled completely. +bool ReadBuffers(FILE* in_file, + size_t audio_buffer_size, + int num_channels, + int16_t* audio_buffer, + FILE* detection_file, + size_t detection_buffer_size, + float* detection_buffer, + FILE* reference_file, + float* reference_buffer) { + std::unique_ptr tmpbuf; + int16_t* read_ptr = audio_buffer; + if (num_channels > 1) { + tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); + read_ptr = tmpbuf.get(); + } + if (fread(read_ptr, sizeof(*read_ptr), num_channels * audio_buffer_size, + in_file) != num_channels * audio_buffer_size) { + return false; + } + // De-interleave. + if (num_channels > 1) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < audio_buffer_size; ++j) { + audio_buffer[i * audio_buffer_size + j] = + read_ptr[i + j * num_channels]; + } + } + } + if (detection_file) { + std::unique_ptr ibuf(new int16_t[detection_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, + detection_file) != detection_buffer_size) + return false; + for (size_t i = 0; i < detection_buffer_size; ++i) + detection_buffer[i] = ibuf[i]; + } + if (reference_file) { + std::unique_ptr ibuf(new int16_t[audio_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) != + audio_buffer_size) + return false; + S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); + } + return true; +} + +// Write a number of samples to an open signed 16-bit host-endian PCM file. +static void WritePCM(FILE* f, + size_t num_samples, + int num_channels, + const float* buffer) { + std::unique_ptr ibuf(new int16_t[num_channels * num_samples]); + // Interleave. + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_samples; ++j) { + ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); + } + } + fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); +} + +// This application tests the transient suppression by providing a processed +// PCM file, which has to be listened to in order to evaluate the +// performance. +// It gets an audio file, and its voice gain information, and the suppressor +// process it giving the output file "suppressed_keystrokes.pcm". +void void_main() { + // TODO(aluebs): Remove all FileWrappers. + // Prepare the input file. + FILE* in_file = fopen(absl::GetFlag(FLAGS_in_file_name).c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + + // Prepare the detection file. + FILE* detection_file = NULL; + if (!absl::GetFlag(FLAGS_detection_file_name).empty()) { + detection_file = + fopen(absl::GetFlag(FLAGS_detection_file_name).c_str(), "rb"); + } + + // Prepare the reference file. + FILE* reference_file = NULL; + if (!absl::GetFlag(FLAGS_reference_file_name).empty()) { + reference_file = + fopen(absl::GetFlag(FLAGS_reference_file_name).c_str(), "rb"); + } + + // Prepare the output file. + std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; + FILE* out_file = fopen(out_file_name.c_str(), "wb"); + ASSERT_TRUE(out_file != NULL); + + int detection_rate_hz = absl::GetFlag(FLAGS_detection_rate_hz); + if (detection_rate_hz == 0) { + detection_rate_hz = absl::GetFlag(FLAGS_sample_rate_hz); + } + + Agc agc; + + TransientSuppressorImpl suppressor(TransientSuppressor::VadMode::kDefault, + absl::GetFlag(FLAGS_sample_rate_hz), + detection_rate_hz, + absl::GetFlag(FLAGS_num_channels)); + + const size_t audio_buffer_size = absl::GetFlag(FLAGS_chunk_size_ms) * + absl::GetFlag(FLAGS_sample_rate_hz) / 1000; + const size_t detection_buffer_size = + absl::GetFlag(FLAGS_chunk_size_ms) * detection_rate_hz / 1000; + + // int16 and float variants of the same data. + std::unique_ptr audio_buffer_i( + new int16_t[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]); + std::unique_ptr audio_buffer_f( + new float[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]); + + std::unique_ptr detection_buffer, reference_buffer; + + if (detection_file) + detection_buffer.reset(new float[detection_buffer_size]); + if (reference_file) + reference_buffer.reset(new float[audio_buffer_size]); + + while (ReadBuffers( + in_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels), + audio_buffer_i.get(), detection_file, detection_buffer_size, + detection_buffer.get(), reference_file, reference_buffer.get())) { + agc.Process({audio_buffer_i.get(), audio_buffer_size}); + + for (size_t i = 0; + i < absl::GetFlag(FLAGS_num_channels) * audio_buffer_size; ++i) { + audio_buffer_f[i] = audio_buffer_i[i]; + } + + suppressor.Suppress(audio_buffer_f.get(), audio_buffer_size, + absl::GetFlag(FLAGS_num_channels), + detection_buffer.get(), detection_buffer_size, + reference_buffer.get(), audio_buffer_size, + agc.voice_probability(), true); + + // Write result to out file. + WritePCM(out_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels), + audio_buffer_f.get()); + } + + fclose(in_file); + if (detection_file) { + fclose(detection_file); + } + if (reference_file) { + fclose(reference_file); + } + fclose(out_file); +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + std::vector args = absl::ParseCommandLine(argc, argv); + if (args.size() != 1) { + printf("%s", webrtc::kUsage); + return 1; + } + RTC_CHECK_GT(absl::GetFlag(FLAGS_chunk_size_ms), 0); + RTC_CHECK_GT(absl::GetFlag(FLAGS_sample_rate_hz), 0); + RTC_CHECK_GT(absl::GetFlag(FLAGS_num_channels), 0); + + webrtc::void_main(); + return 0; +} diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h new file mode 100644 index 0000000000..ecb3c3baab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ + +#include + +namespace webrtc { + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressor { + public: + // Type of VAD used by the caller to compute the `voice_probability` argument + // `Suppress()`. + enum class VadMode { + // By default, `TransientSuppressor` assumes that `voice_probability` is + // computed by `AgcManagerDirect`. + kDefault = 0, + // Use this mode when `TransientSuppressor` must assume that + // `voice_probability` is computed by the RNN VAD. + kRnnVad, + // Use this mode to let `TransientSuppressor::Suppressor()` ignore + // `voice_probability` and behave as if voice information is unavailable + // (regardless of the passed value). + kNoVad, + }; + + virtual ~TransientSuppressor() {} + + virtual void Initialize(int sample_rate_hz, + int detector_rate_hz, + int num_channels) = 0; + + // Processes a `data` chunk, and returns it with keystrokes suppressed from + // it. The float format is assumed to be int16 ranged. If there are more than + // one channel, the chunks are concatenated one after the other in `data`. + // `data_length` must be equal to `data_length_`. + // `num_channels` must be equal to `num_channels_`. + // A sub-band, ideally the higher, can be used as `detection_data`. If it is + // NULL, `data` is used for the detection too. The `detection_data` is always + // assumed mono. + // If a reference signal (e.g. keyboard microphone) is available, it can be + // passed in as `reference_data`. It is assumed mono and must have the same + // length as `data`. NULL is accepted if unavailable. + // This suppressor performs better if voice information is available. + // `voice_probability` is the probability of voice being present in this chunk + // of audio. If voice information is not available, `voice_probability` must + // always be set to 1. + // `key_pressed` determines if a key was pressed on this audio chunk. + // Returns a delayed version of `voice_probability` according to the + // algorithmic delay introduced by this method. In this way, the modified + // `data` and the returned voice probability will be temporally aligned. + virtual float Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build new file mode 100644 index 0000000000..5988e89a6a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_api_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("transient_suppressor_api_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc new file mode 100644 index 0000000000..90428464e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc @@ -0,0 +1,455 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor_impl.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common_audio/include/audio_util.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/transient_detector.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/windows_private.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +static const float kMeanIIRCoefficient = 0.5f; + +// TODO(aluebs): Check if these values work also for 48kHz. +static const size_t kMinVoiceBin = 3; +static const size_t kMaxVoiceBin = 60; + +namespace { + +float ComplexMagnitude(float a, float b) { + return std::abs(a) + std::abs(b); +} + +std::string GetVadModeLabel(TransientSuppressor::VadMode vad_mode) { + switch (vad_mode) { + case TransientSuppressor::VadMode::kDefault: + return "default"; + case TransientSuppressor::VadMode::kRnnVad: + return "RNN VAD"; + case TransientSuppressor::VadMode::kNoVad: + return "no VAD"; + } +} + +} // namespace + +TransientSuppressorImpl::TransientSuppressorImpl(VadMode vad_mode, + int sample_rate_hz, + int detector_rate_hz, + int num_channels) + : vad_mode_(vad_mode), + voice_probability_delay_unit_(/*delay_num_samples=*/0, sample_rate_hz), + analyzed_audio_is_silent_(false), + data_length_(0), + detection_length_(0), + analysis_length_(0), + buffer_delay_(0), + complex_analysis_length_(0), + num_channels_(0), + window_(NULL), + detector_smoothed_(0.f), + keypress_counter_(0), + chunks_since_keypress_(0), + detection_enabled_(false), + suppression_enabled_(false), + use_hard_restoration_(false), + chunks_since_voice_change_(0), + seed_(182), + using_reference_(false) { + RTC_LOG(LS_INFO) << "VAD mode: " << GetVadModeLabel(vad_mode_); + Initialize(sample_rate_hz, detector_rate_hz, num_channels); +} + +TransientSuppressorImpl::~TransientSuppressorImpl() {} + +void TransientSuppressorImpl::Initialize(int sample_rate_hz, + int detection_rate_hz, + int num_channels) { + RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + RTC_DCHECK(detection_rate_hz == ts::kSampleRate8kHz || + detection_rate_hz == ts::kSampleRate16kHz || + detection_rate_hz == ts::kSampleRate32kHz || + detection_rate_hz == ts::kSampleRate48kHz); + RTC_DCHECK_GT(num_channels, 0); + + switch (sample_rate_hz) { + case ts::kSampleRate8kHz: + analysis_length_ = 128u; + window_ = kBlocks80w128; + break; + case ts::kSampleRate16kHz: + analysis_length_ = 256u; + window_ = kBlocks160w256; + break; + case ts::kSampleRate32kHz: + analysis_length_ = 512u; + window_ = kBlocks320w512; + break; + case ts::kSampleRate48kHz: + analysis_length_ = 1024u; + window_ = kBlocks480w1024; + break; + default: + RTC_DCHECK_NOTREACHED(); + return; + } + + detector_.reset(new TransientDetector(detection_rate_hz)); + data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000; + RTC_DCHECK_LE(data_length_, analysis_length_); + buffer_delay_ = analysis_length_ - data_length_; + + voice_probability_delay_unit_.Initialize(/*delay_num_samples=*/buffer_delay_, + sample_rate_hz); + + complex_analysis_length_ = analysis_length_ / 2 + 1; + RTC_DCHECK_GE(complex_analysis_length_, kMaxVoiceBin); + num_channels_ = num_channels; + in_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(in_buffer_.get(), 0, + analysis_length_ * num_channels_ * sizeof(in_buffer_[0])); + detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000; + detection_buffer_.reset(new float[detection_length_]); + memset(detection_buffer_.get(), 0, + detection_length_ * sizeof(detection_buffer_[0])); + out_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(out_buffer_.get(), 0, + analysis_length_ * num_channels_ * sizeof(out_buffer_[0])); + // ip[0] must be zero to trigger initialization using rdft(). + size_t ip_length = 2 + sqrtf(analysis_length_); + ip_.reset(new size_t[ip_length]()); + memset(ip_.get(), 0, ip_length * sizeof(ip_[0])); + wfft_.reset(new float[complex_analysis_length_ - 1]); + memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0])); + spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]); + memset(spectral_mean_.get(), 0, + complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0])); + fft_buffer_.reset(new float[analysis_length_ + 2]); + memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0])); + magnitudes_.reset(new float[complex_analysis_length_]); + memset(magnitudes_.get(), 0, + complex_analysis_length_ * sizeof(magnitudes_[0])); + mean_factor_.reset(new float[complex_analysis_length_]); + + static const float kFactorHeight = 10.f; + static const float kLowSlope = 1.f; + static const float kHighSlope = 0.3f; + for (size_t i = 0; i < complex_analysis_length_; ++i) { + mean_factor_[i] = + kFactorHeight / + (1.f + std::exp(kLowSlope * static_cast(i - kMinVoiceBin))) + + kFactorHeight / + (1.f + std::exp(kHighSlope * static_cast(kMaxVoiceBin - i))); + } + detector_smoothed_ = 0.f; + keypress_counter_ = 0; + chunks_since_keypress_ = 0; + detection_enabled_ = false; + suppression_enabled_ = false; + use_hard_restoration_ = false; + chunks_since_voice_change_ = 0; + seed_ = 182; + using_reference_ = false; +} + +float TransientSuppressorImpl::Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) { + if (!data || data_length != data_length_ || num_channels != num_channels_ || + detection_length != detection_length_ || voice_probability < 0 || + voice_probability > 1) { + // The audio is not modified, so the voice probability is returned as is + // (delay not applied). + return voice_probability; + } + + UpdateKeypress(key_pressed); + UpdateBuffers(data); + + if (detection_enabled_) { + UpdateRestoration(voice_probability); + + if (!detection_data) { + // Use the input data of the first channel if special detection data is + // not supplied. + detection_data = &in_buffer_[buffer_delay_]; + } + + float detector_result = detector_->Detect(detection_data, detection_length, + reference_data, reference_length); + if (detector_result < 0) { + // The audio is not modified, so the voice probability is returned as is + // (delay not applied). + return voice_probability; + } + + using_reference_ = detector_->using_reference(); + + // `detector_smoothed_` follows the `detector_result` when this last one is + // increasing, but has an exponential decaying tail to be able to suppress + // the ringing of keyclicks. + float smooth_factor = using_reference_ ? 0.6 : 0.1; + detector_smoothed_ = detector_result >= detector_smoothed_ + ? detector_result + : smooth_factor * detector_smoothed_ + + (1 - smooth_factor) * detector_result; + + for (int i = 0; i < num_channels_; ++i) { + Suppress(&in_buffer_[i * analysis_length_], + &spectral_mean_[i * complex_analysis_length_], + &out_buffer_[i * analysis_length_]); + } + } + + // If the suppression isn't enabled, we use the in buffer to delay the signal + // appropriately. This also gives time for the out buffer to be refreshed with + // new data between detection and suppression getting enabled. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&data[i * data_length_], + suppression_enabled_ ? &out_buffer_[i * analysis_length_] + : &in_buffer_[i * analysis_length_], + data_length_ * sizeof(*data)); + } + + // The audio has been modified, return the delayed voice probability. + return voice_probability_delay_unit_.Delay(voice_probability); +} + +// This should only be called when detection is enabled. UpdateBuffers() must +// have been called. At return, `out_buffer_` will be filled with the +// processed output. +void TransientSuppressorImpl::Suppress(float* in_ptr, + float* spectral_mean, + float* out_ptr) { + // Go to frequency domain. + for (size_t i = 0; i < analysis_length_; ++i) { + // TODO(aluebs): Rename windows + fft_buffer_[i] = in_ptr[i] * window_[i]; + } + + WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get()); + + // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end + // for convenience. + fft_buffer_[analysis_length_] = fft_buffer_[1]; + fft_buffer_[analysis_length_ + 1] = 0.f; + fft_buffer_[1] = 0.f; + + for (size_t i = 0; i < complex_analysis_length_; ++i) { + magnitudes_[i] = + ComplexMagnitude(fft_buffer_[i * 2], fft_buffer_[i * 2 + 1]); + } + // Restore audio if necessary. + if (suppression_enabled_) { + if (use_hard_restoration_) { + HardRestoration(spectral_mean); + } else { + SoftRestoration(spectral_mean); + } + } + + // Update the spectral mean. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] + + kMeanIIRCoefficient * magnitudes_[i]; + } + + // Back to time domain. + // Put R[n/2] back in fft_buffer_[1]. + fft_buffer_[1] = fft_buffer_[analysis_length_]; + + WebRtc_rdft(analysis_length_, -1, fft_buffer_.get(), ip_.get(), wfft_.get()); + const float fft_scaling = 2.f / analysis_length_; + + for (size_t i = 0; i < analysis_length_; ++i) { + out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling; + } +} + +void TransientSuppressorImpl::UpdateKeypress(bool key_pressed) { + const int kKeypressPenalty = 1000 / ts::kChunkSizeMs; + const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs; + const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds. + + if (key_pressed) { + keypress_counter_ += kKeypressPenalty; + chunks_since_keypress_ = 0; + detection_enabled_ = true; + } + keypress_counter_ = std::max(0, keypress_counter_ - 1); + + if (keypress_counter_ > kIsTypingThreshold) { + if (!suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now enabled."; + } + suppression_enabled_ = true; + keypress_counter_ = 0; + } + + if (detection_enabled_ && ++chunks_since_keypress_ > kChunksUntilNotTyping) { + if (suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now disabled."; + } + detection_enabled_ = false; + suppression_enabled_ = false; + keypress_counter_ = 0; + } +} + +void TransientSuppressorImpl::UpdateRestoration(float voice_probability) { + bool not_voiced; + switch (vad_mode_) { + case TransientSuppressor::VadMode::kDefault: { + constexpr float kVoiceThreshold = 0.02f; + not_voiced = voice_probability < kVoiceThreshold; + break; + } + case TransientSuppressor::VadMode::kRnnVad: { + constexpr float kVoiceThreshold = 0.7f; + not_voiced = voice_probability < kVoiceThreshold; + break; + } + case TransientSuppressor::VadMode::kNoVad: + // Always assume that voice is detected. + not_voiced = false; + break; + } + + if (not_voiced == use_hard_restoration_) { + chunks_since_voice_change_ = 0; + } else { + ++chunks_since_voice_change_; + + // Number of 10 ms frames to wait to transition to and from hard + // restoration. + constexpr int kHardRestorationOffsetDelay = 3; + constexpr int kHardRestorationOnsetDelay = 80; + + if ((use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOffsetDelay) || + (!use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOnsetDelay)) { + use_hard_restoration_ = not_voiced; + chunks_since_voice_change_ = 0; + } + } +} + +// Shift buffers to make way for new data. Must be called after +// `detection_enabled_` is updated by UpdateKeypress(). +void TransientSuppressorImpl::UpdateBuffers(float* data) { + // TODO(aluebs): Change to ring buffer. + memmove(in_buffer_.get(), &in_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(in_buffer_[0])); + // Copy new chunk to buffer. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_], + &data[i * data_length_], data_length_ * sizeof(*data)); + } + if (detection_enabled_) { + // Shift previous chunk in out buffer. + memmove(out_buffer_.get(), &out_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(out_buffer_[0])); + // Initialize new chunk in out buffer. + for (int i = 0; i < num_channels_; ++i) { + memset(&out_buffer_[buffer_delay_ + i * analysis_length_], 0, + data_length_ * sizeof(out_buffer_[0])); + } + } +} + +// Restores the unvoiced signal if a click is present. +// Attenuates by a certain factor every peak in the `fft_buffer_` that exceeds +// the spectral mean. The attenuation depends on `detector_smoothed_`. +// If a restoration takes place, the `magnitudes_` are updated to the new value. +void TransientSuppressorImpl::HardRestoration(float* spectral_mean) { + const float detector_result = + 1.f - std::pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f); + // To restore, we get the peaks in the spectrum. If higher than the previous + // spectral mean we adjust them. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) { + // RandU() generates values on [0, int16::max()] + const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) / + std::numeric_limits::max(); + const float scaled_mean = detector_result * spectral_mean[i]; + + fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] + + scaled_mean * cosf(phase); + fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] + + scaled_mean * sinf(phase); + magnitudes_[i] = magnitudes_[i] - + detector_result * (magnitudes_[i] - spectral_mean[i]); + } + } +} + +// Restores the voiced signal if a click is present. +// Attenuates by a certain factor every peak in the `fft_buffer_` that exceeds +// the spectral mean and that is lower than some function of the current block +// frequency mean. The attenuation depends on `detector_smoothed_`. +// If a restoration takes place, the `magnitudes_` are updated to the new value. +void TransientSuppressorImpl::SoftRestoration(float* spectral_mean) { + // Get the spectral magnitude mean of the current block. + float block_frequency_mean = 0; + for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) { + block_frequency_mean += magnitudes_[i]; + } + block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin); + + // To restore, we get the peaks in the spectrum. If higher than the + // previous spectral mean and lower than a factor of the block mean + // we adjust them. The factor is a double sigmoid that has a minimum in the + // voice frequency range (300Hz - 3kHz). + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 && + (using_reference_ || + magnitudes_[i] < block_frequency_mean * mean_factor_[i])) { + const float new_magnitude = + magnitudes_[i] - + detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]); + const float magnitude_ratio = new_magnitude / magnitudes_[i]; + + fft_buffer_[i * 2] *= magnitude_ratio; + fft_buffer_[i * 2 + 1] *= magnitude_ratio; + magnitudes_[i] = new_magnitude; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h new file mode 100644 index 0000000000..4005a16b0a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ + +#include +#include + +#include + +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { + +class TransientDetector; + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressorImpl : public TransientSuppressor { + public: + TransientSuppressorImpl(VadMode vad_mode, + int sample_rate_hz, + int detector_rate_hz, + int num_channels); + ~TransientSuppressorImpl() override; + + void Initialize(int sample_rate_hz, + int detector_rate_hz, + int num_channels) override; + + float Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) override; + + private: + FRIEND_TEST_ALL_PREFIXES(TransientSuppressorVadModeParametrization, + TypingDetectionLogicWorksAsExpectedForMono); + void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr); + + void UpdateKeypress(bool key_pressed); + void UpdateRestoration(float voice_probability); + + void UpdateBuffers(float* data); + + void HardRestoration(float* spectral_mean); + void SoftRestoration(float* spectral_mean); + + const VadMode vad_mode_; + VoiceProbabilityDelayUnit voice_probability_delay_unit_; + + std::unique_ptr detector_; + + bool analyzed_audio_is_silent_; + + size_t data_length_; + size_t detection_length_; + size_t analysis_length_; + size_t buffer_delay_; + size_t complex_analysis_length_; + int num_channels_; + // Input buffer where the original samples are stored. + std::unique_ptr in_buffer_; + std::unique_ptr detection_buffer_; + // Output buffer where the restored samples are stored. + std::unique_ptr out_buffer_; + + // Arrays for fft. + std::unique_ptr ip_; + std::unique_ptr wfft_; + + std::unique_ptr spectral_mean_; + + // Stores the data for the fft. + std::unique_ptr fft_buffer_; + + std::unique_ptr magnitudes_; + + const float* window_; + + std::unique_ptr mean_factor_; + + float detector_smoothed_; + + int keypress_counter_; + int chunks_since_keypress_; + bool detection_enabled_; + bool suppression_enabled_; + + bool use_hard_restoration_; + int chunks_since_voice_change_; + + uint32_t seed_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_IMPL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build new file mode 100644 index 0000000000..ee6b82a2f6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl_gn/moz.build @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/transient/moving_moments.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/transient_detector.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_impl.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc", + "/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("transient_suppressor_impl_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc new file mode 100644 index 0000000000..ab48504af6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor.h" + +#include + +#include "absl/types/optional.h" +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/transient_suppressor_impl.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kMono = 1; + +// Returns the index of the first non-zero sample in `samples` or an unspecified +// value if no value is zero. +absl::optional FindFirstNonZeroSample(const std::vector& samples) { + for (size_t i = 0; i < samples.size(); ++i) { + if (samples[i] != 0.0f) { + return i; + } + } + return absl::nullopt; +} + +} // namespace + +class TransientSuppressorVadModeParametrization + : public ::testing::TestWithParam {}; + +TEST_P(TransientSuppressorVadModeParametrization, + TypingDetectionLogicWorksAsExpectedForMono) { + TransientSuppressorImpl ts(GetParam(), ts::kSampleRate16kHz, + ts::kSampleRate16kHz, kMono); + + // Each key-press enables detection. + EXPECT_FALSE(ts.detection_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + + // It takes four seconds without any key-press to disable the detection + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + } + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + + // Key-presses that are more than a second apart from each other don't enable + // suppression. + for (int i = 0; i < 100; ++i) { + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } + ts.UpdateKeypress(false); + } + + // Two consecutive key-presses is enough to enable the suppression. + ts.UpdateKeypress(true); + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.suppression_enabled_); + + // Key-presses that are less than a second apart from each other don't disable + // detection nor suppression. + for (int i = 0; i < 100; ++i) { + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + + // It takes four seconds without any key-press to disable the detection and + // suppression. + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } +} + +INSTANTIATE_TEST_SUITE_P( + TransientSuppressorImplTest, + TransientSuppressorVadModeParametrization, + ::testing::Values(TransientSuppressor::VadMode::kDefault, + TransientSuppressor::VadMode::kRnnVad, + TransientSuppressor::VadMode::kNoVad)); + +class TransientSuppressorSampleRateParametrization + : public ::testing::TestWithParam {}; + +// Checks that voice probability and processed audio data are temporally aligned +// after `Suppress()` is called. +TEST_P(TransientSuppressorSampleRateParametrization, + CheckAudioAndVoiceProbabilityTemporallyAligned) { + const int sample_rate_hz = GetParam(); + TransientSuppressorImpl ts(TransientSuppressor::VadMode::kDefault, + sample_rate_hz, + /*detection_rate_hz=*/sample_rate_hz, kMono); + + const int frame_size = sample_rate_hz * ts::kChunkSizeMs / 1000; + std::vector frame(frame_size); + + constexpr int kMaxAttempts = 3; + for (int i = 0; i < kMaxAttempts; ++i) { + SCOPED_TRACE(i); + + // Call `Suppress()` on frames of non-zero audio samples. + std::fill(frame.begin(), frame.end(), 1000.0f); + float delayed_voice_probability = ts.Suppress( + frame.data(), frame.size(), kMono, /*detection_data=*/nullptr, + /*detection_length=*/frame_size, /*reference_data=*/nullptr, + /*reference_length=*/frame_size, /*voice_probability=*/1.0f, + /*key_pressed=*/false); + + // Detect the algorithmic delay of `TransientSuppressorImpl`. + absl::optional frame_delay = FindFirstNonZeroSample(frame); + + // Check that the delayed voice probability is delayed according to the + // measured delay. + if (frame_delay.has_value()) { + if (*frame_delay == 0) { + // When the delay is a multiple integer of the frame duration, + // `Suppress()` returns a copy of a previously observed voice + // probability value. + EXPECT_EQ(delayed_voice_probability, 1.0f); + } else { + // Instead, when the delay is fractional, `Suppress()` returns an + // interpolated value. Since the exact value depends on the + // interpolation method, we only check that the delayed voice + // probability is not zero as it must converge towards the previoulsy + // observed value. + EXPECT_GT(delayed_voice_probability, 0.0f); + } + break; + } else { + // The algorithmic delay is longer than the duration of a single frame. + // Until the delay is detected, the delayed voice probability is zero. + EXPECT_EQ(delayed_voice_probability, 0.0f); + } + } +} + +INSTANTIATE_TEST_SUITE_P(TransientSuppressorImplTest, + TransientSuppressorSampleRateParametrization, + ::testing::Values(ts::kSampleRate8kHz, + ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz)); + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc new file mode 100644 index 0000000000..27b2b42b38 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +VoiceProbabilityDelayUnit::VoiceProbabilityDelayUnit(int delay_num_samples, + int sample_rate_hz) { + Initialize(delay_num_samples, sample_rate_hz); +} + +void VoiceProbabilityDelayUnit::Initialize(int delay_num_samples, + int sample_rate_hz) { + RTC_DCHECK_GE(delay_num_samples, 0); + RTC_DCHECK_LE(delay_num_samples, sample_rate_hz / 50) + << "The implementation does not support delays greater than 20 ms."; + int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); // 10 ms. + if (delay_num_samples <= frame_size) { + weights_[0] = 0.0f; + weights_[1] = static_cast(delay_num_samples) / frame_size; + weights_[2] = + static_cast(frame_size - delay_num_samples) / frame_size; + } else { + delay_num_samples -= frame_size; + weights_[0] = static_cast(delay_num_samples) / frame_size; + weights_[1] = + static_cast(frame_size - delay_num_samples) / frame_size; + weights_[2] = 0.0f; + } + + // Resets the delay unit. + last_probabilities_.fill(0.0f); +} + +float VoiceProbabilityDelayUnit::Delay(float voice_probability) { + float weighted_probability = weights_[0] * last_probabilities_[0] + + weights_[1] * last_probabilities_[1] + + weights_[2] * voice_probability; + last_probabilities_[0] = last_probabilities_[1]; + last_probabilities_[1] = voice_probability; + return weighted_probability; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h new file mode 100644 index 0000000000..05961663e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ + +#include + +namespace webrtc { + +// Iteratively produces a sequence of delayed voice probability values given a +// fixed delay between 0 and 20 ms and given a sequence of voice probability +// values observed every 10 ms. Supports fractional delays, that are delays +// which are not a multiple integer of 10 ms. Applies interpolation with +// fractional delays; otherwise, returns a previously observed value according +// to the given fixed delay. +class VoiceProbabilityDelayUnit { + public: + // Ctor. `delay_num_samples` is the delay in number of samples and it must be + // non-negative and less than 20 ms. + VoiceProbabilityDelayUnit(int delay_num_samples, int sample_rate_hz); + + // Handles delay and sample rate changes and resets the delay unit. + void Initialize(int delay_num_samples, int sample_rate_hz); + + // Observes `voice_probability` and returns a delayed voice probability. + float Delay(float voice_probability); + + private: + std::array weights_; + std::array last_probabilities_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_VOICE_PROBABILITY_DELAY_UNIT_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build new file mode 100644 index 0000000000..e2abcb8490 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("voice_probability_delay_unit_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc new file mode 100644 index 0000000000..04848e6f2c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/voice_probability_delay_unit_unittest.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/voice_probability_delay_unit.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Checks that with zero delay, the observed value is immediately returned as +// delayed value. +TEST(VoiceProbabilityDelayUnit, NoDelay) { + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/0, + /*sample_rate_hz=*/48000); + constexpr int kMax = 5; + for (int i = 0; i <= kMax; ++i) { + SCOPED_TRACE(i); + float voice_probability = static_cast(i) / kMax; + EXPECT_EQ(voice_probability, delay_unit.Delay(voice_probability)); + } +} + +// Checks that with integer delays, an exact copy of a previously observed value +// is returned. +TEST(VoiceProbabilityDelayUnit, IntegerDelay) { + VoiceProbabilityDelayUnit delay_unit_10ms(/*delay_num_samples=*/480, + /*sample_rate_hz=*/48000); + delay_unit_10ms.Delay(0.125f); + EXPECT_EQ(0.125f, delay_unit_10ms.Delay(0.9f)); + + VoiceProbabilityDelayUnit delay_unit_20ms(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit_20ms.Delay(0.125f); + delay_unit_20ms.Delay(0.8f); + EXPECT_EQ(0.125f, delay_unit_20ms.Delay(0.9f)); +} + +// Checks that with a fractional delay < 10 ms, interpolation is applied. +TEST(VoiceProbabilityDelayUnit, FractionalDelayLessThan10ms) { + // Create delay unit with fractional delay of 6 ms. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/288, + /*sample_rate_hz=*/48000); + // frame 0 + // --------- frame 1 + // --------- + // 0000001111 + delay_unit.Delay(1.0f); + EXPECT_FLOAT_EQ(0.68f, delay_unit.Delay(0.2f)); +} + +// Checks that with a fractional delay > 10 ms, interpolation is applied. +TEST(VoiceProbabilityDelayUnit, FractionalDelayGreaterThan10ms) { + // Create delay unit with fractional delay of 14 ms. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/672, + /*sample_rate_hz=*/48000); + // frame 0 + // --------- frame 1 + // --------- frame 2 + // --------- + // 0000111111 + delay_unit.Delay(1.0f); + delay_unit.Delay(0.2f); + EXPECT_FLOAT_EQ(0.52f, delay_unit.Delay(1.0f)); +} + +// Checks that `Initialize()` resets the delay unit. +TEST(VoiceProbabilityDelayUnit, InitializeResetsDelayUnit) { + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit.Delay(1.0f); + delay_unit.Delay(0.9f); + + delay_unit.Initialize(/*delay_num_samples=*/160, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.0f, delay_unit.Delay(0.2f)); + EXPECT_EQ(0.1f, delay_unit.Delay(0.3f)); +} + +// Checks that `Initialize()` handles delay changes. +TEST(VoiceProbabilityDelayUnit, InitializeHandlesDelayChanges) { + // Start with a 20 ms delay. + VoiceProbabilityDelayUnit delay_unit(/*delay_num_samples=*/960, + /*sample_rate_hz=*/48000); + delay_unit.Delay(1.0f); + delay_unit.Delay(0.9f); + + // Lower the delay to 10 ms. + delay_unit.Initialize(/*delay_num_samples=*/80, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.1f, delay_unit.Delay(0.2f)); + + // Increase the delay to 15 ms. + delay_unit.Initialize(/*delay_num_samples=*/120, /*sample_rate_hz=*/8000); + EXPECT_EQ(0.0f, delay_unit.Delay(0.1f)); + EXPECT_EQ(0.05f, delay_unit.Delay(0.2f)); + EXPECT_EQ(0.15f, delay_unit.Delay(0.3f)); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h b/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h new file mode 100644 index 0000000000..54e3c25785 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/windows_private.h @@ -0,0 +1,557 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ + +namespace webrtc { + +// Hanning window for 4ms 16kHz +static const float kHanning64w128[128] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +// hybrib Hanning & flat window +static const float kBlocks80w128[128] = { + 0.00000000f, 0.03271908f, 0.06540313f, 0.09801714f, 0.13052619f, + 0.16289547f, 0.19509032f, 0.22707626f, 0.25881905f, 0.29028468f, + 0.32143947f, 0.35225005f, 0.38268343f, 0.41270703f, 0.44228869f, + 0.47139674f, 0.50000000f, 0.52806785f, 0.55557023f, 0.58247770f, + 0.60876143f, 0.63439328f, 0.65934582f, 0.68359230f, 0.70710678f, + 0.72986407f, 0.75183981f, 0.77301045f, 0.79335334f, 0.81284668f, + 0.83146961f, 0.84920218f, 0.86602540f, 0.88192126f, 0.89687274f, + 0.91086382f, 0.92387953f, 0.93590593f, 0.94693013f, 0.95694034f, + 0.96592583f, 0.97387698f, 0.98078528f, 0.98664333f, 0.99144486f, + 0.99518473f, 0.99785892f, 0.99946459f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99946459f, 0.99785892f, 0.99518473f, 0.99144486f, + 0.98664333f, 0.98078528f, 0.97387698f, 0.96592583f, 0.95694034f, + 0.94693013f, 0.93590593f, 0.92387953f, 0.91086382f, 0.89687274f, + 0.88192126f, 0.86602540f, 0.84920218f, 0.83146961f, 0.81284668f, + 0.79335334f, 0.77301045f, 0.75183981f, 0.72986407f, 0.70710678f, + 0.68359230f, 0.65934582f, 0.63439328f, 0.60876143f, 0.58247770f, + 0.55557023f, 0.52806785f, 0.50000000f, 0.47139674f, 0.44228869f, + 0.41270703f, 0.38268343f, 0.35225005f, 0.32143947f, 0.29028468f, + 0.25881905f, 0.22707626f, 0.19509032f, 0.16289547f, 0.13052619f, + 0.09801714f, 0.06540313f, 0.03271908f}; + +// hybrib Hanning & flat window +static const float kBlocks160w256[256] = { + 0.00000000f, 0.01636173f, 0.03271908f, 0.04906767f, 0.06540313f, + 0.08172107f, 0.09801714f, 0.11428696f, 0.13052619f, 0.14673047f, + 0.16289547f, 0.17901686f, 0.19509032f, 0.21111155f, 0.22707626f, + 0.24298018f, 0.25881905f, 0.27458862f, 0.29028468f, 0.30590302f, + 0.32143947f, 0.33688985f, 0.35225005f, 0.36751594f, 0.38268343f, + 0.39774847f, 0.41270703f, 0.42755509f, 0.44228869f, 0.45690388f, + 0.47139674f, 0.48576339f, 0.50000000f, 0.51410274f, 0.52806785f, + 0.54189158f, 0.55557023f, 0.56910015f, 0.58247770f, 0.59569930f, + 0.60876143f, 0.62166057f, 0.63439328f, 0.64695615f, 0.65934582f, + 0.67155895f, 0.68359230f, 0.69544264f, 0.70710678f, 0.71858162f, + 0.72986407f, 0.74095113f, 0.75183981f, 0.76252720f, 0.77301045f, + 0.78328675f, 0.79335334f, 0.80320753f, 0.81284668f, 0.82226822f, + 0.83146961f, 0.84044840f, 0.84920218f, 0.85772861f, 0.86602540f, + 0.87409034f, 0.88192126f, 0.88951608f, 0.89687274f, 0.90398929f, + 0.91086382f, 0.91749450f, 0.92387953f, 0.93001722f, 0.93590593f, + 0.94154407f, 0.94693013f, 0.95206268f, 0.95694034f, 0.96156180f, + 0.96592583f, 0.97003125f, 0.97387698f, 0.97746197f, 0.98078528f, + 0.98384601f, 0.98664333f, 0.98917651f, 0.99144486f, 0.99344778f, + 0.99518473f, 0.99665524f, 0.99785892f, 0.99879546f, 0.99946459f, + 0.99986614f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99986614f, 0.99946459f, 0.99879546f, 0.99785892f, + 0.99665524f, 0.99518473f, 0.99344778f, 0.99144486f, 0.98917651f, + 0.98664333f, 0.98384601f, 0.98078528f, 0.97746197f, 0.97387698f, + 0.97003125f, 0.96592583f, 0.96156180f, 0.95694034f, 0.95206268f, + 0.94693013f, 0.94154407f, 0.93590593f, 0.93001722f, 0.92387953f, + 0.91749450f, 0.91086382f, 0.90398929f, 0.89687274f, 0.88951608f, + 0.88192126f, 0.87409034f, 0.86602540f, 0.85772861f, 0.84920218f, + 0.84044840f, 0.83146961f, 0.82226822f, 0.81284668f, 0.80320753f, + 0.79335334f, 0.78328675f, 0.77301045f, 0.76252720f, 0.75183981f, + 0.74095113f, 0.72986407f, 0.71858162f, 0.70710678f, 0.69544264f, + 0.68359230f, 0.67155895f, 0.65934582f, 0.64695615f, 0.63439328f, + 0.62166057f, 0.60876143f, 0.59569930f, 0.58247770f, 0.56910015f, + 0.55557023f, 0.54189158f, 0.52806785f, 0.51410274f, 0.50000000f, + 0.48576339f, 0.47139674f, 0.45690388f, 0.44228869f, 0.42755509f, + 0.41270703f, 0.39774847f, 0.38268343f, 0.36751594f, 0.35225005f, + 0.33688985f, 0.32143947f, 0.30590302f, 0.29028468f, 0.27458862f, + 0.25881905f, 0.24298018f, 0.22707626f, 0.21111155f, 0.19509032f, + 0.17901686f, 0.16289547f, 0.14673047f, 0.13052619f, 0.11428696f, + 0.09801714f, 0.08172107f, 0.06540313f, 0.04906767f, 0.03271908f, + 0.01636173f}; + +// hybrib Hanning & flat window: for 20ms +static const float kBlocks320w512[512] = { + 0.00000000f, 0.00818114f, 0.01636173f, 0.02454123f, 0.03271908f, + 0.04089475f, 0.04906767f, 0.05723732f, 0.06540313f, 0.07356456f, + 0.08172107f, 0.08987211f, 0.09801714f, 0.10615561f, 0.11428696f, + 0.12241068f, 0.13052619f, 0.13863297f, 0.14673047f, 0.15481816f, + 0.16289547f, 0.17096189f, 0.17901686f, 0.18705985f, 0.19509032f, + 0.20310773f, 0.21111155f, 0.21910124f, 0.22707626f, 0.23503609f, + 0.24298018f, 0.25090801f, 0.25881905f, 0.26671276f, 0.27458862f, + 0.28244610f, 0.29028468f, 0.29810383f, 0.30590302f, 0.31368174f, + 0.32143947f, 0.32917568f, 0.33688985f, 0.34458148f, 0.35225005f, + 0.35989504f, 0.36751594f, 0.37511224f, 0.38268343f, 0.39022901f, + 0.39774847f, 0.40524131f, 0.41270703f, 0.42014512f, 0.42755509f, + 0.43493645f, 0.44228869f, 0.44961133f, 0.45690388f, 0.46416584f, + 0.47139674f, 0.47859608f, 0.48576339f, 0.49289819f, 0.50000000f, + 0.50706834f, 0.51410274f, 0.52110274f, 0.52806785f, 0.53499762f, + 0.54189158f, 0.54874927f, 0.55557023f, 0.56235401f, 0.56910015f, + 0.57580819f, 0.58247770f, 0.58910822f, 0.59569930f, 0.60225052f, + 0.60876143f, 0.61523159f, 0.62166057f, 0.62804795f, 0.63439328f, + 0.64069616f, 0.64695615f, 0.65317284f, 0.65934582f, 0.66547466f, + 0.67155895f, 0.67759830f, 0.68359230f, 0.68954054f, 0.69544264f, + 0.70129818f, 0.70710678f, 0.71286806f, 0.71858162f, 0.72424708f, + 0.72986407f, 0.73543221f, 0.74095113f, 0.74642045f, 0.75183981f, + 0.75720885f, 0.76252720f, 0.76779452f, 0.77301045f, 0.77817464f, + 0.78328675f, 0.78834643f, 0.79335334f, 0.79830715f, 0.80320753f, + 0.80805415f, 0.81284668f, 0.81758481f, 0.82226822f, 0.82689659f, + 0.83146961f, 0.83598698f, 0.84044840f, 0.84485357f, 0.84920218f, + 0.85349396f, 0.85772861f, 0.86190585f, 0.86602540f, 0.87008699f, + 0.87409034f, 0.87803519f, 0.88192126f, 0.88574831f, 0.88951608f, + 0.89322430f, 0.89687274f, 0.90046115f, 0.90398929f, 0.90745693f, + 0.91086382f, 0.91420976f, 0.91749450f, 0.92071783f, 0.92387953f, + 0.92697940f, 0.93001722f, 0.93299280f, 0.93590593f, 0.93875641f, + 0.94154407f, 0.94426870f, 0.94693013f, 0.94952818f, 0.95206268f, + 0.95453345f, 0.95694034f, 0.95928317f, 0.96156180f, 0.96377607f, + 0.96592583f, 0.96801094f, 0.97003125f, 0.97198664f, 0.97387698f, + 0.97570213f, 0.97746197f, 0.97915640f, 0.98078528f, 0.98234852f, + 0.98384601f, 0.98527764f, 0.98664333f, 0.98794298f, 0.98917651f, + 0.99034383f, 0.99144486f, 0.99247953f, 0.99344778f, 0.99434953f, + 0.99518473f, 0.99595331f, 0.99665524f, 0.99729046f, 0.99785892f, + 0.99836060f, 0.99879546f, 0.99916346f, 0.99946459f, 0.99969882f, + 0.99986614f, 0.99996653f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, + 1.00000000f, 0.99996653f, 0.99986614f, 0.99969882f, 0.99946459f, + 0.99916346f, 0.99879546f, 0.99836060f, 0.99785892f, 0.99729046f, + 0.99665524f, 0.99595331f, 0.99518473f, 0.99434953f, 0.99344778f, + 0.99247953f, 0.99144486f, 0.99034383f, 0.98917651f, 0.98794298f, + 0.98664333f, 0.98527764f, 0.98384601f, 0.98234852f, 0.98078528f, + 0.97915640f, 0.97746197f, 0.97570213f, 0.97387698f, 0.97198664f, + 0.97003125f, 0.96801094f, 0.96592583f, 0.96377607f, 0.96156180f, + 0.95928317f, 0.95694034f, 0.95453345f, 0.95206268f, 0.94952818f, + 0.94693013f, 0.94426870f, 0.94154407f, 0.93875641f, 0.93590593f, + 0.93299280f, 0.93001722f, 0.92697940f, 0.92387953f, 0.92071783f, + 0.91749450f, 0.91420976f, 0.91086382f, 0.90745693f, 0.90398929f, + 0.90046115f, 0.89687274f, 0.89322430f, 0.88951608f, 0.88574831f, + 0.88192126f, 0.87803519f, 0.87409034f, 0.87008699f, 0.86602540f, + 0.86190585f, 0.85772861f, 0.85349396f, 0.84920218f, 0.84485357f, + 0.84044840f, 0.83598698f, 0.83146961f, 0.82689659f, 0.82226822f, + 0.81758481f, 0.81284668f, 0.80805415f, 0.80320753f, 0.79830715f, + 0.79335334f, 0.78834643f, 0.78328675f, 0.77817464f, 0.77301045f, + 0.76779452f, 0.76252720f, 0.75720885f, 0.75183981f, 0.74642045f, + 0.74095113f, 0.73543221f, 0.72986407f, 0.72424708f, 0.71858162f, + 0.71286806f, 0.70710678f, 0.70129818f, 0.69544264f, 0.68954054f, + 0.68359230f, 0.67759830f, 0.67155895f, 0.66547466f, 0.65934582f, + 0.65317284f, 0.64695615f, 0.64069616f, 0.63439328f, 0.62804795f, + 0.62166057f, 0.61523159f, 0.60876143f, 0.60225052f, 0.59569930f, + 0.58910822f, 0.58247770f, 0.57580819f, 0.56910015f, 0.56235401f, + 0.55557023f, 0.54874927f, 0.54189158f, 0.53499762f, 0.52806785f, + 0.52110274f, 0.51410274f, 0.50706834f, 0.50000000f, 0.49289819f, + 0.48576339f, 0.47859608f, 0.47139674f, 0.46416584f, 0.45690388f, + 0.44961133f, 0.44228869f, 0.43493645f, 0.42755509f, 0.42014512f, + 0.41270703f, 0.40524131f, 0.39774847f, 0.39022901f, 0.38268343f, + 0.37511224f, 0.36751594f, 0.35989504f, 0.35225005f, 0.34458148f, + 0.33688985f, 0.32917568f, 0.32143947f, 0.31368174f, 0.30590302f, + 0.29810383f, 0.29028468f, 0.28244610f, 0.27458862f, 0.26671276f, + 0.25881905f, 0.25090801f, 0.24298018f, 0.23503609f, 0.22707626f, + 0.21910124f, 0.21111155f, 0.20310773f, 0.19509032f, 0.18705985f, + 0.17901686f, 0.17096189f, 0.16289547f, 0.15481816f, 0.14673047f, + 0.13863297f, 0.13052619f, 0.12241068f, 0.11428696f, 0.10615561f, + 0.09801714f, 0.08987211f, 0.08172107f, 0.07356456f, 0.06540313f, + 0.05723732f, 0.04906767f, 0.04089475f, 0.03271908f, 0.02454123f, + 0.01636173f, 0.00818114f}; + +// Hanning window: for 15ms at 16kHz with symmetric zeros +static const float kBlocks240w512[512] = { + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00654494f, 0.01308960f, 0.01963369f, + 0.02617695f, 0.03271908f, 0.03925982f, 0.04579887f, 0.05233596f, + 0.05887080f, 0.06540313f, 0.07193266f, 0.07845910f, 0.08498218f, + 0.09150162f, 0.09801714f, 0.10452846f, 0.11103531f, 0.11753740f, + 0.12403446f, 0.13052620f, 0.13701233f, 0.14349262f, 0.14996676f, + 0.15643448f, 0.16289547f, 0.16934951f, 0.17579629f, 0.18223552f, + 0.18866697f, 0.19509032f, 0.20150533f, 0.20791170f, 0.21430916f, + 0.22069745f, 0.22707628f, 0.23344538f, 0.23980446f, 0.24615330f, + 0.25249159f, 0.25881904f, 0.26513544f, 0.27144045f, 0.27773386f, + 0.28401536f, 0.29028466f, 0.29654160f, 0.30278578f, 0.30901700f, + 0.31523499f, 0.32143945f, 0.32763019f, 0.33380687f, 0.33996925f, + 0.34611708f, 0.35225007f, 0.35836795f, 0.36447051f, 0.37055743f, + 0.37662852f, 0.38268346f, 0.38872197f, 0.39474389f, 0.40074885f, + 0.40673664f, 0.41270703f, 0.41865975f, 0.42459452f, 0.43051112f, + 0.43640924f, 0.44228873f, 0.44814920f, 0.45399052f, 0.45981237f, + 0.46561453f, 0.47139674f, 0.47715878f, 0.48290035f, 0.48862126f, + 0.49432120f, 0.50000000f, 0.50565743f, 0.51129311f, 0.51690692f, + 0.52249855f, 0.52806789f, 0.53361452f, 0.53913832f, 0.54463905f, + 0.55011642f, 0.55557024f, 0.56100029f, 0.56640625f, 0.57178795f, + 0.57714522f, 0.58247769f, 0.58778524f, 0.59306765f, 0.59832460f, + 0.60355598f, 0.60876143f, 0.61394083f, 0.61909395f, 0.62422055f, + 0.62932038f, 0.63439333f, 0.63943899f, 0.64445734f, 0.64944810f, + 0.65441096f, 0.65934587f, 0.66425246f, 0.66913062f, 0.67398012f, + 0.67880076f, 0.68359232f, 0.68835455f, 0.69308740f, 0.69779050f, + 0.70246369f, 0.70710677f, 0.71171963f, 0.71630198f, 0.72085363f, + 0.72537440f, 0.72986406f, 0.73432255f, 0.73874950f, 0.74314487f, + 0.74750835f, 0.75183982f, 0.75613910f, 0.76040596f, 0.76464027f, + 0.76884186f, 0.77301043f, 0.77714598f, 0.78124821f, 0.78531694f, + 0.78935206f, 0.79335338f, 0.79732066f, 0.80125386f, 0.80515265f, + 0.80901700f, 0.81284672f, 0.81664157f, 0.82040149f, 0.82412618f, + 0.82781565f, 0.83146966f, 0.83508795f, 0.83867061f, 0.84221727f, + 0.84572780f, 0.84920216f, 0.85264021f, 0.85604161f, 0.85940641f, + 0.86273444f, 0.86602545f, 0.86927933f, 0.87249607f, 0.87567532f, + 0.87881714f, 0.88192129f, 0.88498765f, 0.88801610f, 0.89100653f, + 0.89395881f, 0.89687276f, 0.89974827f, 0.90258533f, 0.90538365f, + 0.90814316f, 0.91086388f, 0.91354549f, 0.91618794f, 0.91879123f, + 0.92135513f, 0.92387950f, 0.92636442f, 0.92880958f, 0.93121493f, + 0.93358046f, 0.93590593f, 0.93819135f, 0.94043654f, 0.94264150f, + 0.94480604f, 0.94693011f, 0.94901365f, 0.95105654f, 0.95305866f, + 0.95501995f, 0.95694035f, 0.95881975f, 0.96065807f, 0.96245527f, + 0.96421117f, 0.96592581f, 0.96759909f, 0.96923089f, 0.97082120f, + 0.97236991f, 0.97387701f, 0.97534233f, 0.97676587f, 0.97814763f, + 0.97948742f, 0.98078531f, 0.98204112f, 0.98325491f, 0.98442656f, + 0.98555607f, 0.98664331f, 0.98768836f, 0.98869103f, 0.98965138f, + 0.99056935f, 0.99144489f, 0.99227792f, 0.99306846f, 0.99381649f, + 0.99452192f, 0.99518472f, 0.99580491f, 0.99638247f, 0.99691731f, + 0.99740952f, 0.99785894f, 0.99826562f, 0.99862951f, 0.99895066f, + 0.99922901f, 0.99946457f, 0.99965733f, 0.99980724f, 0.99991435f, + 0.99997860f, 1.00000000f, 0.99997860f, 0.99991435f, 0.99980724f, + 0.99965733f, 0.99946457f, 0.99922901f, 0.99895066f, 0.99862951f, + 0.99826562f, 0.99785894f, 0.99740946f, 0.99691731f, 0.99638247f, + 0.99580491f, 0.99518472f, 0.99452192f, 0.99381644f, 0.99306846f, + 0.99227792f, 0.99144489f, 0.99056935f, 0.98965138f, 0.98869103f, + 0.98768836f, 0.98664331f, 0.98555607f, 0.98442656f, 0.98325491f, + 0.98204112f, 0.98078525f, 0.97948742f, 0.97814757f, 0.97676587f, + 0.97534227f, 0.97387695f, 0.97236991f, 0.97082120f, 0.96923089f, + 0.96759909f, 0.96592581f, 0.96421117f, 0.96245521f, 0.96065807f, + 0.95881969f, 0.95694029f, 0.95501995f, 0.95305860f, 0.95105648f, + 0.94901365f, 0.94693011f, 0.94480604f, 0.94264150f, 0.94043654f, + 0.93819129f, 0.93590593f, 0.93358046f, 0.93121493f, 0.92880952f, + 0.92636436f, 0.92387950f, 0.92135507f, 0.91879123f, 0.91618794f, + 0.91354543f, 0.91086382f, 0.90814310f, 0.90538365f, 0.90258527f, + 0.89974827f, 0.89687276f, 0.89395875f, 0.89100647f, 0.88801610f, + 0.88498759f, 0.88192123f, 0.87881714f, 0.87567532f, 0.87249595f, + 0.86927933f, 0.86602539f, 0.86273432f, 0.85940641f, 0.85604161f, + 0.85264009f, 0.84920216f, 0.84572780f, 0.84221715f, 0.83867055f, + 0.83508795f, 0.83146954f, 0.82781565f, 0.82412612f, 0.82040137f, + 0.81664157f, 0.81284660f, 0.80901700f, 0.80515265f, 0.80125374f, + 0.79732066f, 0.79335332f, 0.78935200f, 0.78531694f, 0.78124815f, + 0.77714586f, 0.77301049f, 0.76884180f, 0.76464021f, 0.76040596f, + 0.75613904f, 0.75183970f, 0.74750835f, 0.74314481f, 0.73874938f, + 0.73432249f, 0.72986400f, 0.72537428f, 0.72085363f, 0.71630186f, + 0.71171951f, 0.70710677f, 0.70246363f, 0.69779032f, 0.69308734f, + 0.68835449f, 0.68359220f, 0.67880070f, 0.67398006f, 0.66913044f, + 0.66425240f, 0.65934575f, 0.65441096f, 0.64944804f, 0.64445722f, + 0.63943905f, 0.63439327f, 0.62932026f, 0.62422055f, 0.61909389f, + 0.61394072f, 0.60876143f, 0.60355592f, 0.59832448f, 0.59306765f, + 0.58778518f, 0.58247757f, 0.57714522f, 0.57178789f, 0.56640613f, + 0.56100023f, 0.55557019f, 0.55011630f, 0.54463905f, 0.53913826f, + 0.53361434f, 0.52806783f, 0.52249849f, 0.51690674f, 0.51129305f, + 0.50565726f, 0.50000006f, 0.49432117f, 0.48862115f, 0.48290038f, + 0.47715873f, 0.47139663f, 0.46561456f, 0.45981231f, 0.45399037f, + 0.44814920f, 0.44228864f, 0.43640912f, 0.43051112f, 0.42459446f, + 0.41865960f, 0.41270703f, 0.40673658f, 0.40074870f, 0.39474386f, + 0.38872188f, 0.38268328f, 0.37662849f, 0.37055734f, 0.36447033f, + 0.35836792f, 0.35224995f, 0.34611690f, 0.33996922f, 0.33380675f, + 0.32763001f, 0.32143945f, 0.31523487f, 0.30901679f, 0.30278572f, + 0.29654145f, 0.29028472f, 0.28401530f, 0.27773371f, 0.27144048f, + 0.26513538f, 0.25881892f, 0.25249159f, 0.24615324f, 0.23980433f, + 0.23344538f, 0.22707619f, 0.22069728f, 0.21430916f, 0.20791161f, + 0.20150517f, 0.19509031f, 0.18866688f, 0.18223536f, 0.17579627f, + 0.16934940f, 0.16289529f, 0.15643445f, 0.14996666f, 0.14349243f, + 0.13701232f, 0.13052608f, 0.12403426f, 0.11753736f, 0.11103519f, + 0.10452849f, 0.09801710f, 0.09150149f, 0.08498220f, 0.07845904f, + 0.07193252f, 0.06540315f, 0.05887074f, 0.05233581f, 0.04579888f, + 0.03925974f, 0.03271893f, 0.02617695f, 0.01963361f, 0.01308943f, + 0.00654493f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f}; + +// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz +static const float kBlocks480w1024[1024] = { + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00327249f, 0.00654494f, + 0.00981732f, 0.01308960f, 0.01636173f, 0.01963369f, 0.02290544f, + 0.02617695f, 0.02944817f, 0.03271908f, 0.03598964f, 0.03925982f, + 0.04252957f, 0.04579887f, 0.04906768f, 0.05233596f, 0.05560368f, + 0.05887080f, 0.06213730f, 0.06540313f, 0.06866825f, 0.07193266f, + 0.07519628f, 0.07845910f, 0.08172107f, 0.08498218f, 0.08824237f, + 0.09150162f, 0.09475989f, 0.09801714f, 0.10127335f, 0.10452846f, + 0.10778246f, 0.11103531f, 0.11428697f, 0.11753740f, 0.12078657f, + 0.12403446f, 0.12728101f, 0.13052620f, 0.13376999f, 0.13701233f, + 0.14025325f, 0.14349262f, 0.14673047f, 0.14996676f, 0.15320145f, + 0.15643448f, 0.15966582f, 0.16289547f, 0.16612339f, 0.16934951f, + 0.17257382f, 0.17579629f, 0.17901687f, 0.18223552f, 0.18545224f, + 0.18866697f, 0.19187967f, 0.19509032f, 0.19829889f, 0.20150533f, + 0.20470962f, 0.20791170f, 0.21111156f, 0.21430916f, 0.21750447f, + 0.22069745f, 0.22388805f, 0.22707628f, 0.23026206f, 0.23344538f, + 0.23662618f, 0.23980446f, 0.24298020f, 0.24615330f, 0.24932377f, + 0.25249159f, 0.25565669f, 0.25881904f, 0.26197866f, 0.26513544f, + 0.26828939f, 0.27144045f, 0.27458861f, 0.27773386f, 0.28087610f, + 0.28401536f, 0.28715158f, 0.29028466f, 0.29341471f, 0.29654160f, + 0.29966527f, 0.30278578f, 0.30590302f, 0.30901700f, 0.31212768f, + 0.31523499f, 0.31833893f, 0.32143945f, 0.32453656f, 0.32763019f, + 0.33072028f, 0.33380687f, 0.33688986f, 0.33996925f, 0.34304500f, + 0.34611708f, 0.34918544f, 0.35225007f, 0.35531089f, 0.35836795f, + 0.36142117f, 0.36447051f, 0.36751595f, 0.37055743f, 0.37359497f, + 0.37662852f, 0.37965801f, 0.38268346f, 0.38570479f, 0.38872197f, + 0.39173502f, 0.39474389f, 0.39774847f, 0.40074885f, 0.40374491f, + 0.40673664f, 0.40972406f, 0.41270703f, 0.41568562f, 0.41865975f, + 0.42162940f, 0.42459452f, 0.42755508f, 0.43051112f, 0.43346250f, + 0.43640924f, 0.43935132f, 0.44228873f, 0.44522133f, 0.44814920f, + 0.45107228f, 0.45399052f, 0.45690390f, 0.45981237f, 0.46271592f, + 0.46561453f, 0.46850815f, 0.47139674f, 0.47428030f, 0.47715878f, + 0.48003215f, 0.48290035f, 0.48576337f, 0.48862126f, 0.49147385f, + 0.49432120f, 0.49716330f, 0.50000000f, 0.50283140f, 0.50565743f, + 0.50847799f, 0.51129311f, 0.51410276f, 0.51690692f, 0.51970553f, + 0.52249855f, 0.52528602f, 0.52806789f, 0.53084403f, 0.53361452f, + 0.53637928f, 0.53913832f, 0.54189163f, 0.54463905f, 0.54738063f, + 0.55011642f, 0.55284631f, 0.55557024f, 0.55828828f, 0.56100029f, + 0.56370628f, 0.56640625f, 0.56910014f, 0.57178795f, 0.57446963f, + 0.57714522f, 0.57981455f, 0.58247769f, 0.58513463f, 0.58778524f, + 0.59042960f, 0.59306765f, 0.59569931f, 0.59832460f, 0.60094351f, + 0.60355598f, 0.60616195f, 0.60876143f, 0.61135441f, 0.61394083f, + 0.61652070f, 0.61909395f, 0.62166059f, 0.62422055f, 0.62677383f, + 0.62932038f, 0.63186020f, 0.63439333f, 0.63691956f, 0.63943899f, + 0.64195162f, 0.64445734f, 0.64695615f, 0.64944810f, 0.65193301f, + 0.65441096f, 0.65688187f, 0.65934587f, 0.66180271f, 0.66425246f, + 0.66669512f, 0.66913062f, 0.67155898f, 0.67398012f, 0.67639405f, + 0.67880076f, 0.68120021f, 0.68359232f, 0.68597710f, 0.68835455f, + 0.69072467f, 0.69308740f, 0.69544262f, 0.69779050f, 0.70013082f, + 0.70246369f, 0.70478904f, 0.70710677f, 0.70941699f, 0.71171963f, + 0.71401459f, 0.71630198f, 0.71858168f, 0.72085363f, 0.72311789f, + 0.72537440f, 0.72762316f, 0.72986406f, 0.73209721f, 0.73432255f, + 0.73653996f, 0.73874950f, 0.74095118f, 0.74314487f, 0.74533057f, + 0.74750835f, 0.74967808f, 0.75183982f, 0.75399351f, 0.75613910f, + 0.75827658f, 0.76040596f, 0.76252723f, 0.76464027f, 0.76674515f, + 0.76884186f, 0.77093029f, 0.77301043f, 0.77508241f, 0.77714598f, + 0.77920127f, 0.78124821f, 0.78328675f, 0.78531694f, 0.78733873f, + 0.78935206f, 0.79135692f, 0.79335338f, 0.79534125f, 0.79732066f, + 0.79929149f, 0.80125386f, 0.80320752f, 0.80515265f, 0.80708915f, + 0.80901700f, 0.81093621f, 0.81284672f, 0.81474853f, 0.81664157f, + 0.81852591f, 0.82040149f, 0.82226825f, 0.82412618f, 0.82597536f, + 0.82781565f, 0.82964706f, 0.83146966f, 0.83328325f, 0.83508795f, + 0.83688378f, 0.83867061f, 0.84044838f, 0.84221727f, 0.84397703f, + 0.84572780f, 0.84746957f, 0.84920216f, 0.85092574f, 0.85264021f, + 0.85434544f, 0.85604161f, 0.85772866f, 0.85940641f, 0.86107504f, + 0.86273444f, 0.86438453f, 0.86602545f, 0.86765707f, 0.86927933f, + 0.87089235f, 0.87249607f, 0.87409031f, 0.87567532f, 0.87725097f, + 0.87881714f, 0.88037390f, 0.88192129f, 0.88345921f, 0.88498765f, + 0.88650668f, 0.88801610f, 0.88951612f, 0.89100653f, 0.89248741f, + 0.89395881f, 0.89542055f, 0.89687276f, 0.89831537f, 0.89974827f, + 0.90117162f, 0.90258533f, 0.90398932f, 0.90538365f, 0.90676826f, + 0.90814316f, 0.90950841f, 0.91086388f, 0.91220951f, 0.91354549f, + 0.91487163f, 0.91618794f, 0.91749454f, 0.91879123f, 0.92007810f, + 0.92135513f, 0.92262226f, 0.92387950f, 0.92512691f, 0.92636442f, + 0.92759192f, 0.92880958f, 0.93001723f, 0.93121493f, 0.93240267f, + 0.93358046f, 0.93474817f, 0.93590593f, 0.93705362f, 0.93819135f, + 0.93931901f, 0.94043654f, 0.94154406f, 0.94264150f, 0.94372880f, + 0.94480604f, 0.94587320f, 0.94693011f, 0.94797695f, 0.94901365f, + 0.95004016f, 0.95105654f, 0.95206273f, 0.95305866f, 0.95404440f, + 0.95501995f, 0.95598525f, 0.95694035f, 0.95788521f, 0.95881975f, + 0.95974404f, 0.96065807f, 0.96156180f, 0.96245527f, 0.96333838f, + 0.96421117f, 0.96507370f, 0.96592581f, 0.96676767f, 0.96759909f, + 0.96842021f, 0.96923089f, 0.97003126f, 0.97082120f, 0.97160077f, + 0.97236991f, 0.97312868f, 0.97387701f, 0.97461486f, 0.97534233f, + 0.97605932f, 0.97676587f, 0.97746199f, 0.97814763f, 0.97882277f, + 0.97948742f, 0.98014158f, 0.98078531f, 0.98141843f, 0.98204112f, + 0.98265332f, 0.98325491f, 0.98384601f, 0.98442656f, 0.98499662f, + 0.98555607f, 0.98610497f, 0.98664331f, 0.98717111f, 0.98768836f, + 0.98819500f, 0.98869103f, 0.98917651f, 0.98965138f, 0.99011570f, + 0.99056935f, 0.99101239f, 0.99144489f, 0.99186671f, 0.99227792f, + 0.99267852f, 0.99306846f, 0.99344778f, 0.99381649f, 0.99417448f, + 0.99452192f, 0.99485862f, 0.99518472f, 0.99550015f, 0.99580491f, + 0.99609905f, 0.99638247f, 0.99665523f, 0.99691731f, 0.99716878f, + 0.99740952f, 0.99763954f, 0.99785894f, 0.99806762f, 0.99826562f, + 0.99845290f, 0.99862951f, 0.99879545f, 0.99895066f, 0.99909520f, + 0.99922901f, 0.99935216f, 0.99946457f, 0.99956632f, 0.99965733f, + 0.99973762f, 0.99980724f, 0.99986613f, 0.99991435f, 0.99995178f, + 0.99997860f, 0.99999464f, 1.00000000f, 0.99999464f, 0.99997860f, + 0.99995178f, 0.99991435f, 0.99986613f, 0.99980724f, 0.99973762f, + 0.99965733f, 0.99956632f, 0.99946457f, 0.99935216f, 0.99922901f, + 0.99909520f, 0.99895066f, 0.99879545f, 0.99862951f, 0.99845290f, + 0.99826562f, 0.99806762f, 0.99785894f, 0.99763954f, 0.99740946f, + 0.99716872f, 0.99691731f, 0.99665523f, 0.99638247f, 0.99609905f, + 0.99580491f, 0.99550015f, 0.99518472f, 0.99485862f, 0.99452192f, + 0.99417448f, 0.99381644f, 0.99344778f, 0.99306846f, 0.99267852f, + 0.99227792f, 0.99186671f, 0.99144489f, 0.99101239f, 0.99056935f, + 0.99011564f, 0.98965138f, 0.98917651f, 0.98869103f, 0.98819494f, + 0.98768836f, 0.98717111f, 0.98664331f, 0.98610497f, 0.98555607f, + 0.98499656f, 0.98442656f, 0.98384601f, 0.98325491f, 0.98265326f, + 0.98204112f, 0.98141843f, 0.98078525f, 0.98014158f, 0.97948742f, + 0.97882277f, 0.97814757f, 0.97746193f, 0.97676587f, 0.97605932f, + 0.97534227f, 0.97461486f, 0.97387695f, 0.97312862f, 0.97236991f, + 0.97160077f, 0.97082120f, 0.97003126f, 0.96923089f, 0.96842015f, + 0.96759909f, 0.96676761f, 0.96592581f, 0.96507365f, 0.96421117f, + 0.96333838f, 0.96245521f, 0.96156180f, 0.96065807f, 0.95974404f, + 0.95881969f, 0.95788515f, 0.95694029f, 0.95598525f, 0.95501995f, + 0.95404440f, 0.95305860f, 0.95206267f, 0.95105648f, 0.95004016f, + 0.94901365f, 0.94797695f, 0.94693011f, 0.94587314f, 0.94480604f, + 0.94372880f, 0.94264150f, 0.94154406f, 0.94043654f, 0.93931895f, + 0.93819129f, 0.93705362f, 0.93590593f, 0.93474817f, 0.93358046f, + 0.93240267f, 0.93121493f, 0.93001723f, 0.92880952f, 0.92759192f, + 0.92636436f, 0.92512691f, 0.92387950f, 0.92262226f, 0.92135507f, + 0.92007804f, 0.91879123f, 0.91749448f, 0.91618794f, 0.91487157f, + 0.91354543f, 0.91220951f, 0.91086382f, 0.90950835f, 0.90814310f, + 0.90676820f, 0.90538365f, 0.90398932f, 0.90258527f, 0.90117157f, + 0.89974827f, 0.89831525f, 0.89687276f, 0.89542055f, 0.89395875f, + 0.89248741f, 0.89100647f, 0.88951600f, 0.88801610f, 0.88650662f, + 0.88498759f, 0.88345915f, 0.88192123f, 0.88037384f, 0.87881714f, + 0.87725091f, 0.87567532f, 0.87409031f, 0.87249595f, 0.87089223f, + 0.86927933f, 0.86765701f, 0.86602539f, 0.86438447f, 0.86273432f, + 0.86107504f, 0.85940641f, 0.85772860f, 0.85604161f, 0.85434544f, + 0.85264009f, 0.85092574f, 0.84920216f, 0.84746951f, 0.84572780f, + 0.84397697f, 0.84221715f, 0.84044844f, 0.83867055f, 0.83688372f, + 0.83508795f, 0.83328319f, 0.83146954f, 0.82964706f, 0.82781565f, + 0.82597530f, 0.82412612f, 0.82226813f, 0.82040137f, 0.81852591f, + 0.81664157f, 0.81474847f, 0.81284660f, 0.81093609f, 0.80901700f, + 0.80708915f, 0.80515265f, 0.80320752f, 0.80125374f, 0.79929143f, + 0.79732066f, 0.79534125f, 0.79335332f, 0.79135686f, 0.78935200f, + 0.78733861f, 0.78531694f, 0.78328675f, 0.78124815f, 0.77920121f, + 0.77714586f, 0.77508223f, 0.77301049f, 0.77093029f, 0.76884180f, + 0.76674509f, 0.76464021f, 0.76252711f, 0.76040596f, 0.75827658f, + 0.75613904f, 0.75399339f, 0.75183970f, 0.74967796f, 0.74750835f, + 0.74533057f, 0.74314481f, 0.74095106f, 0.73874938f, 0.73653996f, + 0.73432249f, 0.73209721f, 0.72986400f, 0.72762305f, 0.72537428f, + 0.72311789f, 0.72085363f, 0.71858162f, 0.71630186f, 0.71401453f, + 0.71171951f, 0.70941705f, 0.70710677f, 0.70478898f, 0.70246363f, + 0.70013070f, 0.69779032f, 0.69544268f, 0.69308734f, 0.69072461f, + 0.68835449f, 0.68597704f, 0.68359220f, 0.68120021f, 0.67880070f, + 0.67639399f, 0.67398006f, 0.67155886f, 0.66913044f, 0.66669512f, + 0.66425240f, 0.66180259f, 0.65934575f, 0.65688181f, 0.65441096f, + 0.65193301f, 0.64944804f, 0.64695609f, 0.64445722f, 0.64195150f, + 0.63943905f, 0.63691956f, 0.63439327f, 0.63186014f, 0.62932026f, + 0.62677372f, 0.62422055f, 0.62166059f, 0.61909389f, 0.61652064f, + 0.61394072f, 0.61135429f, 0.60876143f, 0.60616189f, 0.60355592f, + 0.60094339f, 0.59832448f, 0.59569913f, 0.59306765f, 0.59042960f, + 0.58778518f, 0.58513451f, 0.58247757f, 0.57981461f, 0.57714522f, + 0.57446963f, 0.57178789f, 0.56910002f, 0.56640613f, 0.56370628f, + 0.56100023f, 0.55828822f, 0.55557019f, 0.55284619f, 0.55011630f, + 0.54738069f, 0.54463905f, 0.54189152f, 0.53913826f, 0.53637916f, + 0.53361434f, 0.53084403f, 0.52806783f, 0.52528596f, 0.52249849f, + 0.51970541f, 0.51690674f, 0.51410276f, 0.51129305f, 0.50847787f, + 0.50565726f, 0.50283122f, 0.50000006f, 0.49716327f, 0.49432117f, + 0.49147379f, 0.48862115f, 0.48576325f, 0.48290038f, 0.48003212f, + 0.47715873f, 0.47428021f, 0.47139663f, 0.46850798f, 0.46561456f, + 0.46271589f, 0.45981231f, 0.45690379f, 0.45399037f, 0.45107210f, + 0.44814920f, 0.44522130f, 0.44228864f, 0.43935123f, 0.43640912f, + 0.43346232f, 0.43051112f, 0.42755505f, 0.42459446f, 0.42162928f, + 0.41865960f, 0.41568545f, 0.41270703f, 0.40972400f, 0.40673658f, + 0.40374479f, 0.40074870f, 0.39774850f, 0.39474386f, 0.39173496f, + 0.38872188f, 0.38570464f, 0.38268328f, 0.37965804f, 0.37662849f, + 0.37359491f, 0.37055734f, 0.36751580f, 0.36447033f, 0.36142117f, + 0.35836792f, 0.35531086f, 0.35224995f, 0.34918529f, 0.34611690f, + 0.34304500f, 0.33996922f, 0.33688980f, 0.33380675f, 0.33072016f, + 0.32763001f, 0.32453656f, 0.32143945f, 0.31833887f, 0.31523487f, + 0.31212750f, 0.30901679f, 0.30590302f, 0.30278572f, 0.29966521f, + 0.29654145f, 0.29341453f, 0.29028472f, 0.28715155f, 0.28401530f, + 0.28087601f, 0.27773371f, 0.27458847f, 0.27144048f, 0.26828936f, + 0.26513538f, 0.26197854f, 0.25881892f, 0.25565651f, 0.25249159f, + 0.24932374f, 0.24615324f, 0.24298008f, 0.23980433f, 0.23662600f, + 0.23344538f, 0.23026201f, 0.22707619f, 0.22388794f, 0.22069728f, + 0.21750426f, 0.21430916f, 0.21111152f, 0.20791161f, 0.20470949f, + 0.20150517f, 0.19829892f, 0.19509031f, 0.19187963f, 0.18866688f, + 0.18545210f, 0.18223536f, 0.17901689f, 0.17579627f, 0.17257376f, + 0.16934940f, 0.16612324f, 0.16289529f, 0.15966584f, 0.15643445f, + 0.15320137f, 0.14996666f, 0.14673033f, 0.14349243f, 0.14025325f, + 0.13701232f, 0.13376991f, 0.13052608f, 0.12728085f, 0.12403426f, + 0.12078657f, 0.11753736f, 0.11428688f, 0.11103519f, 0.10778230f, + 0.10452849f, 0.10127334f, 0.09801710f, 0.09475980f, 0.09150149f, + 0.08824220f, 0.08498220f, 0.08172106f, 0.07845904f, 0.07519618f, + 0.07193252f, 0.06866808f, 0.06540315f, 0.06213728f, 0.05887074f, + 0.05560357f, 0.05233581f, 0.04906749f, 0.04579888f, 0.04252954f, + 0.03925974f, 0.03598953f, 0.03271893f, 0.02944798f, 0.02617695f, + 0.02290541f, 0.01963361f, 0.01636161f, 0.01308943f, 0.00981712f, + 0.00654493f, 0.00327244f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, + 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc new file mode 100644 index 0000000000..2e0ee7e5b7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include +#include + +#include "common_audio/fir_filter.h" +#include "common_audio/fir_filter_factory.h" +#include "modules/audio_processing/transient/dyadic_decimator.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDNode::WPDNode(size_t length, + const float* coefficients, + size_t coefficients_length) + : // The data buffer has parent data length to be able to contain and + // filter it. + data_(new float[2 * length + 1]), + length_(length), + filter_( + CreateFirFilter(coefficients, coefficients_length, 2 * length + 1)) { + RTC_DCHECK_GT(length, 0); + RTC_DCHECK(coefficients); + RTC_DCHECK_GT(coefficients_length, 0); + memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0])); +} + +WPDNode::~WPDNode() {} + +int WPDNode::Update(const float* parent_data, size_t parent_data_length) { + if (!parent_data || (parent_data_length / 2) != length_) { + return -1; + } + + // Filter data. + filter_->Filter(parent_data, parent_data_length, data_.get()); + + // Decimate data. + const bool kOddSequence = true; + size_t output_samples = DyadicDecimate(data_.get(), parent_data_length, + kOddSequence, data_.get(), length_); + if (output_samples != length_) { + return -1; + } + + // Get abs to all values. + for (size_t i = 0; i < length_; ++i) { + data_[i] = fabs(data_[i]); + } + + return 0; +} + +int WPDNode::set_data(const float* new_data, size_t length) { + if (!new_data || length != length_) { + return -1; + } + memcpy(data_.get(), new_data, length * sizeof(data_[0])); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h new file mode 100644 index 0000000000..41614fab0f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ + +#include + +namespace webrtc { + +class FIRFilter; + +// A single node of a Wavelet Packet Decomposition (WPD) tree. +class WPDNode { + public: + // Creates a WPDNode. The data vector will contain zeros. The filter will have + // the coefficients provided. + WPDNode(size_t length, const float* coefficients, size_t coefficients_length); + ~WPDNode(); + + // Updates the node data. `parent_data` / 2 must be equals to `length_`. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* parent_data, size_t parent_data_length); + + const float* data() const { return data_.get(); } + // Returns 0 if correct, and -1 otherwise. + int set_data(const float* new_data, size_t length); + size_t length() const { return length_; } + + private: + std::unique_ptr data_; + size_t length_; + std::unique_ptr filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc new file mode 100644 index 0000000000..5f9238255c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_node_unittest.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kDataLength = 5; +static const float kTolerance = 0.0001f; + +static const size_t kParentDataLength = kDataLength * 2; +static const float kParentData[kParentDataLength] = {1.f, 2.f, 3.f, 4.f, 5.f, + 6.f, 7.f, 8.f, 9.f, 10.f}; + +static const float kCoefficients[] = {0.2f, -0.3f, 0.5f, -0.7f, 0.11f}; +static const size_t kCoefficientsLength = + sizeof(kCoefficients) / sizeof(kCoefficients[0]); + +TEST(WPDNodeTest, Accessors) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.set_data(kParentData, kDataLength)); + EXPECT_EQ(0, memcmp(node.data(), kParentData, + kDataLength * sizeof(node.data()[0]))); +} + +TEST(WPDNodeTest, UpdateThatOnlyDecimates) { + const float kIndentyCoefficient = 1.f; + WPDNode node(kDataLength, &kIndentyCoefficient, 1); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + for (size_t i = 0; i < kDataLength; ++i) { + EXPECT_FLOAT_EQ(kParentData[i * 2 + 1], node.data()[i]); + } +} + +TEST(WPDNodeTest, UpdateWithArbitraryDataAndArbitraryFilter) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + EXPECT_NEAR(0.1f, node.data()[0], kTolerance); + EXPECT_NEAR(0.2f, node.data()[1], kTolerance); + EXPECT_NEAR(0.18f, node.data()[2], kTolerance); + EXPECT_NEAR(0.56f, node.data()[3], kTolerance); + EXPECT_NEAR(0.94f, node.data()[4], kTolerance); +} + +TEST(WPDNodeTest, ExpectedErrorReturnValue) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(-1, node.Update(kParentData, kParentDataLength - 1)); + EXPECT_EQ(-1, node.Update(NULL, kParentDataLength)); + EXPECT_EQ(-1, node.set_data(kParentData, kDataLength - 1)); + EXPECT_EQ(-1, node.set_data(NULL, kDataLength)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc new file mode 100644 index 0000000000..c8aa615881 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include + +#include "modules/audio_processing/transient/wpd_node.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDTree::WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels) + : data_length_(data_length), + levels_(levels), + num_nodes_((1 << (levels + 1)) - 1) { + RTC_DCHECK_GT(data_length, (static_cast(1) << levels)); + RTC_DCHECK(high_pass_coefficients); + RTC_DCHECK(low_pass_coefficients); + RTC_DCHECK_GT(levels, 0); + // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never + // allocated. + nodes_.reset(new std::unique_ptr[num_nodes_ + 1]); + + // Create the first node + const float kRootCoefficient = 1.f; // Identity Coefficient. + nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1)); + // Variables used to create the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + // Branching each node in each level to create its children. The last level is + // not branched (all the nodes of that level are leaves). + for (int current_level = 0; current_level < levels; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2, + low_pass_coefficients, + coefficients_length)); + nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2, + high_pass_coefficients, + coefficients_length)); + } + } +} + +WPDTree::~WPDTree() {} + +WPDNode* WPDTree::NodeAt(int level, int index) { + if (level < 0 || level > levels_ || index < 0 || index >= 1 << level) { + return NULL; + } + + return nodes_[(1 << level) + index].get(); +} + +int WPDTree::Update(const float* data, size_t data_length) { + if (!data || data_length != data_length_) { + return -1; + } + + // Update the root node. + int update_result = nodes_[1]->set_data(data, data_length); + if (update_result != 0) { + return -1; + } + + // Variables used to update the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + for (int current_level = 0; current_level < levels_; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + + update_result = nodes_[index_left_child]->Update(nodes_[index]->data(), + nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + + update_result = nodes_[index_right_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + } + } + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h new file mode 100644 index 0000000000..13cb8d9c2f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ + +#include + +#include + +#include "modules/audio_processing/transient/wpd_node.h" + +namespace webrtc { + +// Tree of a Wavelet Packet Decomposition (WPD). +// +// The root node contains all the data provided; for each node in the tree, the +// left child contains the approximation coefficients extracted from the node, +// and the right child contains the detail coefficients. +// It preserves its state, so it can be multiple-called. +// +// The number of nodes in the tree will be 2 ^ levels - 1. +// +// Implementation details: Since the tree always will be a complete binary tree, +// it is implemented using a single linear array instead of managing the +// relationships in each node. For convience is better to use a array that +// starts in 1 (instead of 0). Taking that into account, the following formulas +// apply: +// Root node index: 1. +// Node(Level, Index in that level): 2 ^ Level + (Index in that level). +// Left Child: Current node index * 2. +// Right Child: Current node index * 2 + 1. +// Parent: Current Node Index / 2 (Integer division). +class WPDTree { + public: + // Creates a WPD tree using the data length and coefficients provided. + WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels); + ~WPDTree(); + + // Returns the number of nodes at any given level. + static int NumberOfNodesAtLevel(int level) { return 1 << level; } + + // Returns a pointer to the node at the given level and index(of that level). + // Level goes from 0 to levels(). + // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1. + // + // You can use the following formulas to get any node within the tree: + // Notation: (Level, Index of node in that level). + // Root node: (0/0). + // Left Child: (Current node level + 1, Current node index * 2). + // Right Child: (Current node level + 1, Current node index * 2 + 1). + // Parent: (Current node level - 1, Current node index / 2) (Integer division) + // + // If level or index are out of bounds the function will return NULL. + WPDNode* NodeAt(int level, int index); + + // Updates all the nodes of the tree with the new data. `data_length` must be + // teh same that was used for the creation of the tree. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* data, size_t data_length); + + // Returns the total number of levels below the root. Root is cosidered level + // 0. + int levels() const { return levels_; } + + // Returns the total number of nodes. + int num_nodes() const { return num_nodes_; } + + // Returns the total number of leaves. + int num_leaves() const { return 1 << levels_; } + + private: + size_t data_length_; + int levels_; + int num_nodes_; + std::unique_ptr[]> nodes_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc new file mode 100644 index 0000000000..bf3ff987d7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/transient/wpd_tree_unittest.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include +#include + +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(WPDTreeTest, Construction) { + const size_t kTestBufferSize = 100; + const int kLevels = 5; + const int kExpectedNumberOfNodes = (1 << (kLevels + 1)) - 1; + + float test_buffer[kTestBufferSize]; + memset(test_buffer, 0.f, kTestBufferSize * sizeof(*test_buffer)); + float test_coefficients[] = {1.f, 2.f, 3.f, 4.f, 5.f}; + const size_t kTestCoefficientsLength = + sizeof(test_coefficients) / sizeof(test_coefficients[0]); + WPDTree tree(kTestBufferSize, test_coefficients, test_coefficients, + kTestCoefficientsLength, kLevels); + ASSERT_EQ(kExpectedNumberOfNodes, tree.num_nodes()); + // Checks for NodeAt(level, index). + int nodes_at_level = 0; + for (int level = 0; level <= kLevels; ++level) { + nodes_at_level = 1 << level; + for (int i = 0; i < nodes_at_level; ++i) { + ASSERT_TRUE(NULL != tree.NodeAt(level, i)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(level, -1)); + EXPECT_EQ(NULL, tree.NodeAt(level, -12)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level + 5)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(-1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(-12, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 5, 0)); + // Checks for Update(). + EXPECT_EQ(0, tree.Update(test_buffer, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(NULL, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(test_buffer, kTestBufferSize - 1)); +} + +// This test is for the correctness of the tree. +// Checks the results from the Matlab equivalent, it is done comparing the +// results that are stored in the output files from Matlab. +// It also writes the results in its own set of files in the out directory. +// Matlab and output files contain all the results in double precision (Little +// endian) appended. +#if defined(WEBRTC_IOS) +TEST(WPDTreeTest, DISABLED_CorrectnessBasedOnMatlabFiles) { +#else +TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) { +#endif + // 10 ms at 16000 Hz. + const size_t kTestBufferSize = 160; + const int kLevels = 3; + const int kLeaves = 1 << kLevels; + const size_t kLeavesSamples = kTestBufferSize >> kLevels; + // Create tree with Discrete Meyer Wavelet Coefficients. + WPDTree tree(kTestBufferSize, kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, kDaubechies8CoefficientsLength, + kLevels); + // Allocate and open all matlab and out files. + FileWrapper matlab_files_data[kLeaves]; + FileWrapper out_files_data[kLeaves]; + + for (int i = 0; i < kLeaves; ++i) { + // Matlab files. + rtc::StringBuilder matlab_stream; + matlab_stream << "audio_processing/transient/wpd" << i; + std::string matlab_string = test::ResourcePath(matlab_stream.str(), "dat"); + matlab_files_data[i] = FileWrapper::OpenReadOnly(matlab_string); + + bool file_opened = matlab_files_data[i].is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << matlab_string; + + // Out files. + rtc::StringBuilder out_stream; + out_stream << test::OutputPath() << "wpd_" << i << ".out"; + std::string out_string = out_stream.str(); + + out_files_data[i] = FileWrapper::OpenWriteOnly(out_string); + + file_opened = out_files_data[i].is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << out_string; + } + + // Prepare the test file. + std::string test_file_name = test::ResourcePath( + "audio_processing/transient/ajm-macbook-1-spke16m", "pcm"); + + FileWrapper test_file = FileWrapper::OpenReadOnly(test_file_name); + + bool file_opened = test_file.is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << test_file_name; + + float test_buffer[kTestBufferSize]; + + // Only the first frames of the audio file are tested. The matlab files also + // only contains information about the first frames. + const size_t kMaxFramesToTest = 100; + const float kTolerance = 0.03f; + + size_t frames_read = 0; + + // Read first buffer from the PCM test file. + size_t file_samples_read = + ReadInt16FromFileToFloatBuffer(&test_file, kTestBufferSize, test_buffer); + while (file_samples_read > 0 && frames_read < kMaxFramesToTest) { + ++frames_read; + + if (file_samples_read < kTestBufferSize) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < kTestBufferSize; ++i) { + test_buffer[i] = 0.0; + } + } + tree.Update(test_buffer, kTestBufferSize); + double matlab_buffer[kTestBufferSize]; + + // Compare results with data from the matlab test files. + for (int i = 0; i < kLeaves; ++i) { + // Compare data values + size_t matlab_samples_read = ReadDoubleBufferFromFile( + &matlab_files_data[i], kLeavesSamples, matlab_buffer); + + ASSERT_EQ(kLeavesSamples, matlab_samples_read) + << "Matlab test files are malformed.\n" + "File: 3_" + << i; + // Get output data from the corresponding node + const float* node_data = tree.NodeAt(kLevels, i)->data(); + // Compare with matlab files. + for (size_t j = 0; j < kLeavesSamples; ++j) { + EXPECT_NEAR(matlab_buffer[j], node_data[j], kTolerance) + << "\nLeaf: " << i << "\nSample: " << j + << "\nFrame: " << frames_read - 1; + } + + // Write results to out files. + WriteFloatBufferToFile(&out_files_data[i], kLeavesSamples, node_data); + } + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer( + &test_file, kTestBufferSize, test_buffer); + } + + // Close all matlab and out files. + for (int i = 0; i < kLeaves; ++i) { + matlab_files_data[i].Close(); + out_files_data[i].Close(); + } + + test_file.Close(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn new file mode 100644 index 0000000000..4851e77b03 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/BUILD.gn @@ -0,0 +1,79 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +rtc_library("cascaded_biquad_filter") { + sources = [ + "cascaded_biquad_filter.cc", + "cascaded_biquad_filter.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + ] +} + +rtc_library("legacy_delay_estimator") { + sources = [ + "delay_estimator.cc", + "delay_estimator.h", + "delay_estimator_internal.h", + "delay_estimator_wrapper.cc", + "delay_estimator_wrapper.h", + ] + deps = [ "../../../rtc_base:checks" ] +} + +rtc_library("pffft_wrapper") { + visibility = [ "../*" ] + sources = [ + "pffft_wrapper.cc", + "pffft_wrapper.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:checks", + "//third_party/pffft", + ] +} + +if (rtc_include_tests) { + rtc_library("cascaded_biquad_filter_unittest") { + testonly = true + + sources = [ "cascaded_biquad_filter_unittest.cc" ] + deps = [ + ":cascaded_biquad_filter", + "../../../test:test_support", + "//testing/gtest", + ] + } + + rtc_library("legacy_delay_estimator_unittest") { + testonly = true + + sources = [ "delay_estimator_unittest.cc" ] + deps = [ + ":legacy_delay_estimator", + "../../../test:test_support", + "//testing/gtest", + ] + } + + rtc_library("pffft_wrapper_unittest") { + testonly = true + sources = [ "pffft_wrapper_unittest.cc" ] + deps = [ + ":pffft_wrapper", + "../../../test:test_support", + "//testing/gtest", + "//third_party/pffft", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/utility/DEPS b/third_party/libwebrtc/modules/audio_processing/utility/DEPS new file mode 100644 index 0000000000..c72d810b24 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/pffft", +] diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc new file mode 100644 index 0000000000..0d236ce0be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +CascadedBiQuadFilter::BiQuadParam::BiQuadParam(std::complex zero, + std::complex pole, + float gain, + bool mirror_zero_along_i_axis) + : zero(zero), + pole(pole), + gain(gain), + mirror_zero_along_i_axis(mirror_zero_along_i_axis) {} + +CascadedBiQuadFilter::BiQuadParam::BiQuadParam(const BiQuadParam&) = default; + +CascadedBiQuadFilter::BiQuad::BiQuad( + const CascadedBiQuadFilter::BiQuadParam& param) + : x(), y() { + float z_r = std::real(param.zero); + float z_i = std::imag(param.zero); + float p_r = std::real(param.pole); + float p_i = std::imag(param.pole); + float gain = param.gain; + + if (param.mirror_zero_along_i_axis) { + // Assuming zeroes at z_r and -z_r. + RTC_DCHECK(z_i == 0.f); + coefficients.b[0] = gain * 1.f; + coefficients.b[1] = 0.f; + coefficients.b[2] = gain * -(z_r * z_r); + } else { + // Assuming zeros at (z_r + z_i*i) and (z_r - z_i*i). + coefficients.b[0] = gain * 1.f; + coefficients.b[1] = gain * -2.f * z_r; + coefficients.b[2] = gain * (z_r * z_r + z_i * z_i); + } + + // Assuming poles at (p_r + p_i*i) and (p_r - p_i*i). + coefficients.a[0] = -2.f * p_r; + coefficients.a[1] = p_r * p_r + p_i * p_i; +} + +void CascadedBiQuadFilter::BiQuad::BiQuad::Reset() { + x[0] = x[1] = y[0] = y[1] = 0.f; +} + +CascadedBiQuadFilter::CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads) + : biquads_(num_biquads, BiQuad(coefficients)) {} + +CascadedBiQuadFilter::CascadedBiQuadFilter( + const std::vector& biquad_params) { + for (const auto& param : biquad_params) { + biquads_.push_back(BiQuad(param)); + } +} + +CascadedBiQuadFilter::~CascadedBiQuadFilter() = default; + +void CascadedBiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + if (biquads_.size() > 0) { + ApplyBiQuad(x, y, &biquads_[0]); + for (size_t k = 1; k < biquads_.size(); ++k) { + ApplyBiQuad(y, y, &biquads_[k]); + } + } else { + std::copy(x.begin(), x.end(), y.begin()); + } +} + +void CascadedBiQuadFilter::Process(rtc::ArrayView y) { + for (auto& biquad : biquads_) { + ApplyBiQuad(y, y, &biquad); + } +} + +void CascadedBiQuadFilter::Reset() { + for (auto& biquad : biquads_) { + biquad.Reset(); + } +} + +void CascadedBiQuadFilter::ApplyBiQuad(rtc::ArrayView x, + rtc::ArrayView y, + CascadedBiQuadFilter::BiQuad* biquad) { + RTC_DCHECK_EQ(x.size(), y.size()); + const float c_a_0 = biquad->coefficients.a[0]; + const float c_a_1 = biquad->coefficients.a[1]; + const float c_b_0 = biquad->coefficients.b[0]; + const float c_b_1 = biquad->coefficients.b[1]; + const float c_b_2 = biquad->coefficients.b[2]; + float m_x_0 = biquad->x[0]; + float m_x_1 = biquad->x[1]; + float m_y_0 = biquad->y[0]; + float m_y_1 = biquad->y[1]; + for (size_t k = 0; k < x.size(); ++k) { + const float tmp = x[k]; + y[k] = c_b_0 * tmp + c_b_1 * m_x_0 + c_b_2 * m_x_1 - c_a_0 * m_y_0 - + c_a_1 * m_y_1; + m_x_1 = m_x_0; + m_x_0 = tmp; + m_y_1 = m_y_0; + m_y_0 = y[k]; + } + biquad->x[0] = m_x_0; + biquad->x[1] = m_x_1; + biquad->y[0] = m_y_0; + biquad->y[1] = m_y_1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h new file mode 100644 index 0000000000..120b52aa57 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ + +#include + +#include +#include + +#include "api/array_view.h" + +namespace webrtc { + +// Applies a number of biquads in a cascaded manner. The filter implementation +// is direct form 1. +class CascadedBiQuadFilter { + public: + struct BiQuadParam { + BiQuadParam(std::complex zero, + std::complex pole, + float gain, + bool mirror_zero_along_i_axis = false); + explicit BiQuadParam(const BiQuadParam&); + std::complex zero; + std::complex pole; + float gain; + bool mirror_zero_along_i_axis; + }; + + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + struct BiQuad { + explicit BiQuad(const BiQuadCoefficients& coefficients) + : coefficients(coefficients), x(), y() {} + explicit BiQuad(const CascadedBiQuadFilter::BiQuadParam& param); + void Reset(); + BiQuadCoefficients coefficients; + float x[2]; + float y[2]; + }; + + CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads); + explicit CascadedBiQuadFilter( + const std::vector& biquad_params); + ~CascadedBiQuadFilter(); + CascadedBiQuadFilter(const CascadedBiQuadFilter&) = delete; + CascadedBiQuadFilter& operator=(const CascadedBiQuadFilter&) = delete; + + // Applies the biquads on the values in x in order to form the output in y. + void Process(rtc::ArrayView x, rtc::ArrayView y); + // Applies the biquads on the values in y in an in-place manner. + void Process(rtc::ArrayView y); + // Resets the filter to its initial state. + void Reset(); + + private: + void ApplyBiQuad(rtc::ArrayView x, + rtc::ArrayView y, + CascadedBiQuadFilter::BiQuad* biquad); + + std::vector biquads_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_CASCADED_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build new file mode 100644 index 0000000000..8cf3aaefeb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("cascaded_biquad_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc new file mode 100644 index 0000000000..ff7022dba4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/cascaded_biquad_filter_unittest.cc @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/cascaded_biquad_filter.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +// Coefficients for a second order Butterworth high-pass filter with cutoff +// frequency 100 Hz. +const CascadedBiQuadFilter::BiQuadCoefficients kHighPassFilterCoefficients = { + {0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kTransparentCoefficients = { + {1.f, 0.f, 0.f}, + {0.f, 0.f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kBlockingCoefficients = { + {0.f, 0.f, 0.f}, + {0.f, 0.f}}; + +std::vector CreateInputWithIncreasingValues(size_t vector_length) { + std::vector v(vector_length); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + return v; +} + +} // namespace + +// Verifies that the filter applies an effect which removes the input signal. +// The test also verifies that the in-place Process API call works as intended. +TEST(CascadedBiquadFilter, BlockingConfiguration) { + std::vector values = CreateInputWithIncreasingValues(1000); + + CascadedBiQuadFilter filter(kBlockingCoefficients, 1); + filter.Process(values); + + EXPECT_EQ(std::vector(1000, 0.f), values); +} + +// Verifies that the filter is able to form a zero-mean output from a +// non-zeromean input signal when coefficients for a high-pass filter are +// applied. The test also verifies that the filter works with multiple biquads. +TEST(CascadedBiquadFilter, HighPassConfiguration) { + std::vector values(1000); + for (size_t k = 0; k < values.size(); ++k) { + values[k] = 1.f; + } + + CascadedBiQuadFilter filter(kHighPassFilterCoefficients, 2); + filter.Process(values); + + for (size_t k = values.size() / 2; k < values.size(); ++k) { + EXPECT_NEAR(0.f, values[k], 1e-4); + } +} + +// Verifies that the reset functionality works as intended. +TEST(CascadedBiquadFilter, HighPassConfigurationResetFunctionality) { + CascadedBiQuadFilter filter(kHighPassFilterCoefficients, 2); + + std::vector values1(100, 1.f); + filter.Process(values1); + + filter.Reset(); + + std::vector values2(100, 1.f); + filter.Process(values2); + + for (size_t k = 0; k < values1.size(); ++k) { + EXPECT_EQ(values1[k], values2[k]); + } +} + +// Verifies that the filter is able to produce a transparent effect with no +// impact on the data when the proper coefficients are applied. The test also +// verifies that the non-in-place Process API call works as intended. +TEST(CascadedBiquadFilter, TransparentConfiguration) { + const std::vector input = CreateInputWithIncreasingValues(1000); + std::vector output(input.size()); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + filter.Process(input, output); + + EXPECT_EQ(input, output); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check of the lengths for the input and output works for the +// non-in-place call. +TEST(CascadedBiquadFilterDeathTest, InputSizeCheckVerification) { + const std::vector input = CreateInputWithIncreasingValues(10); + std::vector output(input.size() - 1); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + EXPECT_DEATH(filter.Process(input, output), ""); +} +#endif + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// lowpass filter. +TEST(CascadedBiquadFilter, BiQuadParamLowPass) { + CascadedBiQuadFilter::BiQuadParam param( + {-1.0f, 0.0f}, {0.23146901f, 0.39514232f}, 0.1866943331163784f); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.18669433f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], 0.37338867f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], 0.18669433f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -0.46293803f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 0.20971536f, epsilon); +} + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// highpass filter. +TEST(CascadedBiquadFilter, BiQuadParamHighPass) { + CascadedBiQuadFilter::BiQuadParam param( + {1.0f, 0.0f}, {0.72712179f, 0.21296904f}, 0.75707637533388494f); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.75707638f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], -1.51415275f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], 0.75707638f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -1.45424359f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 0.57406192f, epsilon); +} + +// Verifies the conversion from zero, pole, gain to filter coefficients for +// bandpass filter. +TEST(CascadedBiquadFilter, BiQuadParamBandPass) { + CascadedBiQuadFilter::BiQuadParam param( + {1.0f, 0.0f}, {1.11022302e-16f, 0.71381051f}, 0.2452372752527856f, true); + CascadedBiQuadFilter::BiQuad filter(param); + const float epsilon = 1e-6f; + EXPECT_NEAR(filter.coefficients.b[0], 0.24523728f, epsilon); + EXPECT_NEAR(filter.coefficients.b[1], 0.f, epsilon); + EXPECT_NEAR(filter.coefficients.b[2], -0.24523728f, epsilon); + EXPECT_NEAR(filter.coefficients.a[0], -2.22044605e-16f, epsilon); + EXPECT_NEAR(filter.coefficients.a[1], 5.09525449e-01f, epsilon); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc new file mode 100644 index 0000000000..6868392f6f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" + +#include +#include + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { + +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; + +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. + +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; + +} // namespace + +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = + u32 - ((u32 >> 1) & 033333333333) - ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + return ((int)tmp); +} + +// Compares the `binary_vector` with all rows of the `binary_matrix` and counts +// per row the number of times they have the same value. +// +// Inputs: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(uint32_t binary_vector, + const uint32_t* binary_matrix, + int matrix_size, + int32_t* bit_counts) { + int n = 0; + + // Compare `binary_vector` with all rows of the `binary_matrix` + for (; n < matrix_size; n++) { + bit_counts[n] = (int32_t)BitCount(binary_vector ^ binary_matrix[n]); + } +} + +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of `candidate_hits`, which states for how long we have had the +// same `candidate_delay` +// 2. the `histogram` of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// `candidate_delay` location. `valley_depth_q14` is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) + ? kMaxHitsWhenPossiblyNonCausal + : kMaxHitsWhenPossiblyCausal; + int i = 0; + + RTC_DCHECK_EQ(self->history_size, self->farend->history_size); + // Reset `candidate_hits` if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The `histogram` is updated differently across the bins. + // 1. The `candidate_delay` histogram bin is increased with the + // `valley_depth`, which is a simple measure of how reliable the + // `candidate_delay` is. The histogram is not increased above + // `kHistogramMax`. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of `candidate_delay` are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of `last_delay` are decreased + // with `decrease_in_last_set`. This value equals the difference between + // the cost function values at the locations `candidate_delay` and + // `last_delay` until we reach `max_hits_for_slow_change` consecutive hits + // at the `candidate_delay`. If we exceed this amount of hits the + // `candidate_delay` is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with `valley_depth`. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = + (self->mean_bit_counts[self->compare_delay] - valley_level_q14) * + kQ14Scaling; + } + // 4. All other bins are decreased with `valley_depth`. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = + (i >= candidate_delay - 2) && (i <= candidate_delay + 1); + self->histogram[i] -= + decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; + } + } +} + +// Validates the `candidate_delay`, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the `candidate_delay` < `last_delay` it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// `candidate_delay` is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. +// +// Input: +// - candidate_delay : The delay to validate. +// +// Return value: +// - is_histogram_valid : 1 - The `candidate_delay` is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; + + // The histogram based validation of `candidate_delay` is done by comparing + // the `histogram` at bin `candidate_delay` with a `histogram_threshold`. + // This `histogram_threshold` equals a `fraction` of the `histogram` at bin + // `last_delay`. The `fraction` is a piecewise linear function of the + // `delay_difference` between the `candidate_delay` and the `last_delay` + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping `last_delay` instead of updating to `candidate_delay` could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // `kMinHistogramThreshold`. In addition, we also require the number of + // `candidate_hits` to be more than `kMinRequiredHits` to remove spurious + // values. + + // Calculate a comparison histogram value (`histogram_threshold`) that is + // depending on the distance between the `candidate_delay` and `last_delay`. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference > self->allowed_offset) { + fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset); + fraction = (fraction > kMinFractionWhenPossiblyCausal + ? fraction + : kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = + kMinFractionWhenPossiblyNonCausal - kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); + } + histogram_threshold *= fraction; + histogram_threshold = + (histogram_threshold > kMinHistogramThreshold ? histogram_threshold + : kMinHistogramThreshold); + + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; +} + +// Performs a robust validation of the `candidate_delay` estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// `is_instantaneous_valid` and the `is_histogram_valid` and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. +// +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // `is_instantaneous_valid` and 2) the histogram based with result stored in + // `is_histogram_valid`. + // i) Before we actually have a valid estimate (`last_delay` == -2), we say + // a candidate is valid if either algorithm states so + // (`is_instantaneous_valid` OR `is_histogram_valid`). + is_robust = + (self->last_delay < 0) && (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (`is_instantaneous_valid` AND `is_histogram_valid`) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if `is_histogram_valid` = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; +} + +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + if (self == NULL) { + return; + } + + free(self->binary_far_history); + self->binary_far_history = NULL; + + free(self->far_bit_counts); + self->far_bit_counts = NULL; + + free(self); +} + +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size) { + BinaryDelayEstimatorFarend* self = NULL; + + if (history_size > 1) { + // Sanity conditions fulfilled. + self = static_cast( + malloc(sizeof(BinaryDelayEstimatorFarend))); + } + if (self == NULL) { + return NULL; + } + + self->history_size = 0; + self->binary_far_history = NULL; + self->far_bit_counts = NULL; + if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) { + WebRtc_FreeBinaryDelayEstimatorFarend(self); + self = NULL; + } + return self; +} + +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size) { + RTC_DCHECK(self); + // (Re-)Allocate memory for history buffers. + self->binary_far_history = static_cast( + realloc(self->binary_far_history, + history_size * sizeof(*self->binary_far_history))); + self->far_bit_counts = static_cast(realloc( + self->far_bit_counts, history_size * sizeof(*self->far_bit_counts))); + if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->binary_far_history[self->history_size], 0, + sizeof(*self->binary_far_history) * size_diff); + memset(&self->far_bit_counts[self->history_size], 0, + sizeof(*self->far_bit_counts) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + RTC_DCHECK(self); + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + memset(self->far_bit_counts, 0, sizeof(int) * self->history_size); +} + +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, + int delay_shift) { + int abs_shift = abs(delay_shift); + int shift_size = 0; + int dest_index = 0; + int src_index = 0; + int padding_index = 0; + + RTC_DCHECK(self); + shift_size = self->history_size - abs_shift; + RTC_DCHECK_GT(shift_size, 0); + if (delay_shift == 0) { + return; + } else if (delay_shift > 0) { + dest_index = abs_shift; + } else if (delay_shift < 0) { + src_index = abs_shift; + padding_index = shift_size; + } + + // Shift and zero pad buffers. + memmove(&self->binary_far_history[dest_index], + &self->binary_far_history[src_index], + sizeof(*self->binary_far_history) * shift_size); + memset(&self->binary_far_history[padding_index], 0, + sizeof(*self->binary_far_history) * abs_shift); + memmove(&self->far_bit_counts[dest_index], &self->far_bit_counts[src_index], + sizeof(*self->far_bit_counts) * shift_size); + memset(&self->far_bit_counts[padding_index], 0, + sizeof(*self->far_bit_counts) * abs_shift); +} + +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle, + uint32_t binary_far_spectrum) { + RTC_DCHECK(handle); + // Shift binary spectrum history and insert current `binary_far_spectrum`. + memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), + (handle->history_size - 1) * sizeof(uint32_t)); + handle->binary_far_history[0] = binary_far_spectrum; + + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current `binary_far_spectrum`. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); +} + +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { + if (self == NULL) { + return; + } + + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + + free(self->bit_counts); + self->bit_counts = NULL; + + free(self->binary_near_history); + self->binary_near_history = NULL; + + free(self->histogram); + self->histogram = NULL; + + // BinaryDelayEstimator does not have ownership of `farend`, hence we do not + // free the memory here. That should be handled separately by the user. + self->farend = NULL; + + free(self); +} + +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, + int max_lookahead) { + BinaryDelayEstimator* self = NULL; + + if ((farend != NULL) && (max_lookahead >= 0)) { + // Sanity conditions fulfilled. + self = static_cast( + malloc(sizeof(BinaryDelayEstimator))); + } + if (self == NULL) { + return NULL; + } + + self->farend = farend; + self->near_history_size = max_lookahead + 1; + self->history_size = 0; + self->robust_validation_enabled = 0; // Disabled by default. + self->allowed_offset = 0; + + self->lookahead = max_lookahead; + + // Allocate memory for spectrum and history buffers. + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->histogram = NULL; + self->binary_near_history = static_cast( + malloc((max_lookahead + 1) * sizeof(*self->binary_near_history))); + if (self->binary_near_history == NULL || + WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + } + + return self; +} + +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size) { + BinaryDelayEstimatorFarend* far = self->farend; + // (Re-)Allocate memory for spectrum and history buffers. + if (history_size != far->history_size) { + // Only update far-end buffers if we need. + history_size = WebRtc_AllocateFarendBufferMemory(far, history_size); + } + // The extra array element in `mean_bit_counts` and `histogram` is a dummy + // element only used while `last_delay` == -2, i.e., before we have a valid + // estimate. + self->mean_bit_counts = static_cast( + realloc(self->mean_bit_counts, + (history_size + 1) * sizeof(*self->mean_bit_counts))); + self->bit_counts = static_cast( + realloc(self->bit_counts, history_size * sizeof(*self->bit_counts))); + self->histogram = static_cast( + realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram))); + + if ((self->mean_bit_counts == NULL) || (self->bit_counts == NULL) || + (self->histogram == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->mean_bit_counts[self->history_size], 0, + sizeof(*self->mean_bit_counts) * size_diff); + memset(&self->bit_counts[self->history_size], 0, + sizeof(*self->bit_counts) * size_diff); + memset(&self->histogram[self->history_size], 0, + sizeof(*self->histogram) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { + int i = 0; + RTC_DCHECK(self); + + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->binary_near_history, 0, + sizeof(uint32_t) * self->near_history_size); + for (i = 0; i <= self->history_size; ++i) { + self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; + } + self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9. + self->last_delay_probability = (int)kMaxBitCountsQ9; // 32 in Q9. + + // Default return value if we're unable to estimate. -1 is used for errors. + self->last_delay = -2; + + self->last_candidate_delay = -2; + self->compare_delay = self->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; +} + +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift) { + int lookahead = 0; + RTC_DCHECK(self); + lookahead = self->lookahead; + self->lookahead -= delay_shift; + if (self->lookahead < 0) { + self->lookahead = 0; + } + if (self->lookahead > self->near_history_size - 1) { + self->lookahead = self->near_history_size - 1; + } + return lookahead - self->lookahead; +} + +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum) { + int i = 0; + int candidate_delay = -1; + int valid_candidate = 0; + + int32_t value_best_candidate = kMaxBitCountsQ9; + int32_t value_worst_candidate = 0; + int32_t valley_depth = 0; + + RTC_DCHECK(self); + if (self->farend->history_size != self->history_size) { + // Non matching history sizes. + return -1; + } + if (self->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current `binary_near_spectrum` and pull out the delayed one. + memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]), + (self->near_history_size - 1) * sizeof(uint32_t)); + self->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = self->binary_near_history[self->lookahead]; + } + + // Compare with delayed spectra and store the `bit_counts` for each delay. + BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, + self->history_size, self->bit_counts); + + // Update `mean_bit_counts`, which is the smoothed version of `bit_counts`. + for (i = 0; i < self->history_size; i++) { + // `bit_counts` is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + int32_t bit_count = (self->bit_counts[i] << 9); // Q9. + + // Update `mean_bit_counts` only when far-end signal has something to + // contribute. If `far_bit_counts` is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (self->farend->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. `far_bit_counts`. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); + } + } + + // Find `candidate_delay`, `value_best_candidate` and `value_worst_candidate` + // of `mean_bit_counts`. + for (i = 0; i < self->history_size; i++) { + if (self->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = self->mean_bit_counts[i]; + candidate_delay = i; + } + if (self->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = self->mean_bit_counts[i]; + } + } + valley_depth = value_worst_candidate - value_best_candidate; + + // The `value_best_candidate` is a good indicator on the probability of + // `candidate_delay` being an accurate delay (a small `value_best_candidate` + // means a good binary match). In the following sections we make a decision + // whether to update `last_delay` or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update `last_delay`. + // 2) If the situation is reliable we update `last_delay` if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold `minimum_probability`, or + // ii) this corresponding value `last_delay_probability`, but updated at + // this time instant. + + // Update `minimum_probability`. + if ((self->minimum_probability > kProbabilityLowerLimit) && + (valley_depth > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between `value_worst_candidate` and `value_best_candidate` has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (self->minimum_probability > threshold) { + self->minimum_probability = threshold; + } + } + // Update `last_delay_probability`. + // We use a Markov type model, i.e., a slowly increasing level over time. + self->last_delay_probability++; + // Validate `candidate_delay`. We have a reliable instantaneous delay + // estimate if + // 1) The valley is distinct enough (`valley_depth` > `kProbabilityOffset`) + // and + // 2) The depth of the valley is deep enough + // (`value_best_candidate` < `minimum_probability`) + // and deeper than the best estimate so far + // (`value_best_candidate` < `last_delay_probability`) + valid_candidate = ((valley_depth > kProbabilityOffset) && + ((value_best_candidate < self->minimum_probability) || + (value_best_candidate < self->last_delay_probability))); + + // Check for nonstationary farend signal. + const bool non_stationary_farend = + std::any_of(self->farend->far_bit_counts, + self->farend->far_bit_counts + self->history_size, + [](int a) { return a > 0; }); + + if (non_stationary_farend) { + // Only update the validation statistics when the farend is nonstationary + // as the underlying estimates are otherwise frozen. + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + } + + if (self->robust_validation_enabled) { + int is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + } + + // Only update the delay estimate when the farend is nonstationary and when + // a valid delay candidate is available. + if (non_stationary_farend && valid_candidate) { + if (candidate_delay != self->last_delay) { + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax + ? kLastHistogramMax + : self->histogram[candidate_delay]); + // Adjust the histogram if we made a change to `last_delay`, though it was + // not the most likely one according to the histogram. + if (self->histogram[candidate_delay] < + self->histogram[self->compare_delay]) { + self->histogram[self->compare_delay] = self->histogram[candidate_delay]; + } + } + self->last_delay = candidate_delay; + if (value_best_candidate < self->last_delay_probability) { + self->last_delay_probability = value_best_candidate; + } + self->compare_delay = self->last_delay; + } + + return self->last_delay; +} + +int WebRtc_binary_last_delay(BinaryDelayEstimator* self) { + RTC_DCHECK(self); + return self->last_delay; +} + +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) { + float quality = 0; + RTC_DCHECK(self); + + if (self->robust_validation_enabled) { + // Simply a linear function of the histogram height at delay estimate. + quality = self->histogram[self->compare_delay] / kHistogramMax; + } else { + // Note that `last_delay_probability` states how deep the minimum of the + // cost function is, so it is rather an error probability. + quality = (float)(kMaxBitCountsQ9 - self->last_delay_probability) / + kMaxBitCountsQ9; + if (quality < 0) { + quality = 0; + } + } + return quality; +} + +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t diff = new_value - *mean_value; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -((-diff) >> factor); + } else { + diff = (diff >> factor); + } + *mean_value += diff; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h new file mode 100644 index 0000000000..b6fc36a759 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.h @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on binary converted spectra. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ + +#include + +namespace webrtc { + +static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9. + +typedef struct { + // Pointer to bit counts. + int* far_bit_counts; + // Binary history variables. + uint32_t* binary_far_history; + int history_size; +} BinaryDelayEstimatorFarend; + +typedef struct { + // Pointer to bit counts. + int32_t* mean_bit_counts; + // Array only used locally in ProcessBinarySpectrum() but whose size is + // determined at run-time. + int32_t* bit_counts; + + // Binary history variables. + uint32_t* binary_near_history; + int near_history_size; + int history_size; + + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; + + // Delay memory. + int last_delay; + + // Robust validation + int robust_validation_enabled; + int allowed_offset; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; + + // For dynamically changing the lookahead when using SoftReset...(). + int lookahead; + + // Far-end binary spectrum history buffer etc. + BinaryDelayEstimatorFarend* farend; +} BinaryDelayEstimator; + +// Releases the memory allocated by +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// Input: +// - self : Pointer to the binary delay estimation far-end +// instance which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Allocates the memory needed by the far-end part of the binary delay +// estimation. The memory needs to be initialized separately through +// WebRtc_InitBinaryDelayEstimatorFarend(...). +// +// Inputs: +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - BinaryDelayEstimatorFarend* +// : Created `handle`. If the memory can't be allocated +// or if any of the input parameters are invalid NULL +// is returned. +// +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size); + +// Re-allocates the buffers. +// +// Inputs: +// - self : Pointer to the binary estimation far-end instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size); + +// Initializes the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - self : Pointer to the delay estimation far-end instance. +// +// Output: +// - self : Initialized far-end instance. +// +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Soft resets the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, + int delay_shift); + +// Adds the binary far-end spectrum to the internal far-end history buffer. This +// spectrum is used as reference when calculating the delay using +// WebRtc_ProcessBinarySpectrum(). +// +// Inputs: +// - self : Pointer to the delay estimation far-end +// instance. +// - binary_far_spectrum : Far-end binary spectrum. +// +// Output: +// - self : Updated far-end instance. +// +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self, + uint32_t binary_far_spectrum); + +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). +// +// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does +// not take ownership of it, hence the BinaryDelayEstimator has to be torn down +// before the far-end. +// +// Input: +// - self : Pointer to the binary delay estimation instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Allocates the memory needed by the binary delay estimation. The memory needs +// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...). +// +// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed +// description. +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, + int max_lookahead); + +// Re-allocates `history_size` dependent buffers. The far-end buffers will be +// updated at the same time if needed. +// +// Input: +// - self : Pointer to the binary estimation instance which is +// the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// - history_size : Size of the history buffers. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size); + +// Initializes the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Output: +// - self : Initialized instance. +// +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Soft resets the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +// +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift); + +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. It is assumed the binary far-end spectrum has been added using +// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by +// the lookahead (i.e. the lookahead should be subtracted from the returned +// value). +// +// Inputs: +// - self : Pointer to the delay estimation instance. +// - binary_near_spectrum : Near-end binary spectrum of the current block. +// +// Output: +// - self : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -2 - Insufficient data for estimation. +// +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum); + +// Returns the last calculated delay updated by the function +// WebRtc_ProcessBinarySpectrum(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -2 - Insufficient data for estimation. +// +int WebRtc_binary_last_delay(BinaryDelayEstimator* self); + +// Returns the estimation quality of the last calculated delay updated by the +// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value +// in the interval [0, 1]. The higher the value, the better the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated +// delay value. +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self); + +// Updates the `mean_value` recursively with a step size of 2^-`factor`. This +// function is used internally in the Binary Delay Estimator as well as the +// Fixed point wrapper. +// +// Inputs: +// - new_value : The new value the mean should be updated with. +// - factor : The step size, in number of right shifts. +// +// Input/Output: +// - mean_value : Pointer to the mean value. +// +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h new file mode 100644 index 0000000000..891e20027d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_internal.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Header file including the delay estimator handle used for testing. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ + +#include "modules/audio_processing/utility/delay_estimator.h" + +namespace webrtc { + +typedef union { + float float_; + int32_t int32_; +} SpectrumType; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + // `mean_far_spectrum` initialization indicator. + int far_spectrum_initialized; + + int spectrum_size; + + // Far-end part of binary spectrum based delay estimation. + BinaryDelayEstimatorFarend* binary_farend; +} DelayEstimatorFarend; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_near_spectrum; + // `mean_near_spectrum` initialization indicator. + int near_spectrum_initialized; + + int spectrum_size; + + // Binary spectrum based delay estimator + BinaryDelayEstimator* binary_handle; +} DelayEstimator; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc new file mode 100644 index 0000000000..6052612ef3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_unittest.cc @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" + +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kSpectrumSize = 65; +// Delay history sizes. +constexpr int kMaxDelay = 100; +constexpr int kLookahead = 10; +constexpr int kHistorySize = kMaxDelay + kLookahead; +// Length of binary spectrum sequence. +constexpr int kSequenceLength = 400; + +const int kDifferentHistorySize = 3; +const int kDifferentLookahead = 1; + +const int kEnable[] = {0, 1}; +const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable); + +class DelayEstimatorTest : public ::testing::Test { + protected: + DelayEstimatorTest(); + void SetUp() override; + void TearDown() override; + + void Init(); + void InitBinary(); + void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay); + void RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset); + void RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation); + + void* handle_; + DelayEstimator* self_; + void* farend_handle_; + DelayEstimatorFarend* farend_self_; + BinaryDelayEstimator* binary_; + BinaryDelayEstimatorFarend* binary_farend_; + int spectrum_size_; + // Dummy input spectra. + float far_f_[kSpectrumSize]; + float near_f_[kSpectrumSize]; + uint16_t far_u16_[kSpectrumSize]; + uint16_t near_u16_[kSpectrumSize]; + uint32_t binary_spectrum_[kSequenceLength + kHistorySize]; +}; + +DelayEstimatorTest::DelayEstimatorTest() + : handle_(NULL), + self_(NULL), + farend_handle_(NULL), + farend_self_(NULL), + binary_(NULL), + binary_farend_(NULL), + spectrum_size_(kSpectrumSize) { + // Dummy input data are set with more or less arbitrary non-zero values. + memset(far_f_, 1, sizeof(far_f_)); + memset(near_f_, 2, sizeof(near_f_)); + memset(far_u16_, 1, sizeof(far_u16_)); + memset(near_u16_, 2, sizeof(near_u16_)); + // Construct a sequence of binary spectra used to verify delay estimate. The + // `kSequenceLength` has to be long enough for the delay estimation to leave + // the initialized state. + binary_spectrum_[0] = 1; + for (int i = 1; i < (kSequenceLength + kHistorySize); i++) { + binary_spectrum_[i] = 3 * binary_spectrum_[i - 1]; + } +} + +void DelayEstimatorTest::SetUp() { + farend_handle_ = + WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, kHistorySize); + ASSERT_TRUE(farend_handle_ != NULL); + farend_self_ = reinterpret_cast(farend_handle_); + handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead); + ASSERT_TRUE(handle_ != NULL); + self_ = reinterpret_cast(handle_); + binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize); + ASSERT_TRUE(binary_farend_ != NULL); + binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead); + ASSERT_TRUE(binary_ != NULL); +} + +void DelayEstimatorTest::TearDown() { + WebRtc_FreeDelayEstimator(handle_); + handle_ = NULL; + self_ = NULL; + WebRtc_FreeDelayEstimatorFarend(farend_handle_); + farend_handle_ = NULL; + farend_self_ = NULL; + WebRtc_FreeBinaryDelayEstimator(binary_); + binary_ = NULL; + WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_); + binary_farend_ = NULL; +} + +void DelayEstimatorTest::Init() { + // Initialize Delay Estimator + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + // Verify initialization. + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state. + EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality. +} + +void DelayEstimatorTest::InitBinary() { + // Initialize Binary Delay Estimator (far-end part). + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + // Initialize Binary Delay Estimator + WebRtc_InitBinaryDelayEstimator(binary_); + // Verify initialization. This does not guarantee a complete check, since + // `last_delay` may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary_->last_delay); +} + +void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle, + int offset, + int delay) { + // Verify that we WebRtc_binary_last_delay() returns correct delay. + EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle)); + + if (delay != -2) { + // Verify correct delay estimate. In the non-causal case the true delay + // is equivalent with the `offset`. + EXPECT_EQ(offset, delay); + } +} + +void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset) { + int different_validations = + binary1->robust_validation_enabled ^ binary2->robust_validation_enabled; + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + WebRtc_InitBinaryDelayEstimator(binary1); + WebRtc_InitBinaryDelayEstimator(binary2); + // Verify initialization. This does not guarantee a complete check, since + // `last_delay` may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary1->last_delay); + EXPECT_EQ(-2, binary2->last_delay); + for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) { + WebRtc_AddBinaryFarSpectrum(binary_farend_, + binary_spectrum_[i + far_offset]); + int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]); + int delay_2 = WebRtc_ProcessBinarySpectrum( + binary2, binary_spectrum_[i - near_offset]); + + VerifyDelay(binary1, far_offset + kLookahead, delay_1); + VerifyDelay(binary2, + far_offset + kLookahead + lookahead_offset + near_offset, + delay_2); + // Expect the two delay estimates to be offset by `lookahead_offset` + + // `near_offset` when we have left the initial state. + if ((delay_1 != -2) && (delay_2 != -2)) { + EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset); + } + // For the case of identical signals `delay_1` and `delay_2` should match + // all the time, unless one of them has robust validation turned on. In + // that case the robust validation leaves the initial state faster. + if ((near_offset == 0) && (lookahead_offset == 0)) { + if (!different_validations) { + EXPECT_EQ(delay_1, delay_2); + } else { + if (binary1->robust_validation_enabled) { + EXPECT_GE(delay_1, delay_2); + } else { + EXPECT_GE(delay_2, delay_1); + } + } + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_binary_last_delay(binary1)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1)); + EXPECT_NE(-2, WebRtc_binary_last_delay(binary2)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2)); +} + +void DelayEstimatorTest::RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation) { + BinaryDelayEstimator* binary2 = WebRtc_CreateBinaryDelayEstimator( + binary_farend_, kLookahead + lookahead_offset); + // Verify the delay for both causal and non-causal systems. For causal systems + // the delay is equivalent with a positive `offset` of the far-end sequence. + // For non-causal systems the delay is equivalent with a negative `offset` of + // the far-end sequence. + binary_->robust_validation_enabled = ref_robust_validation; + binary2->robust_validation_enabled = robust_validation; + for (int offset = -kLookahead; + offset < kMaxDelay - lookahead_offset - near_offset; offset++) { + RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset); + } + WebRtc_FreeBinaryDelayEstimator(binary2); + binary2 = NULL; + binary_->robust_validation_enabled = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) { + // In this test we verify correct error returns on invalid API calls. + + // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator() + // should return a NULL pointer on invalid input values. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + void* handle = farend_handle_; + handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1); + EXPECT_TRUE(handle == NULL); + + handle = handle_; + handle = WebRtc_CreateDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimator(farend_handle_, -1); + EXPECT_TRUE(handle == NULL); + + // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should + // return -1 if we have a NULL pointer as `handle`. + EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL)); + EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL)); + + // WebRtc_AddFarSpectrumFloat() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_)); + // Use `farend_handle_` which is properly created at SetUp(). + EXPECT_EQ(-1, + WebRtc_AddFarSpectrumFloat(farend_handle_, NULL, spectrum_size_)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_ + 1)); + + // WebRtc_AddFarSpectrumFix() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in far-end spectrum (Q-domain > 15). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, + WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 16)); + + // WebRtc_set_history_size() should return -1 if: + // 1) `handle` is a NULL. + // 2) `history_size` <= 1. + EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1)); + EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1)); + // WebRtc_history_size() should return -1 if: + // 1) NULL pointer input. + EXPECT_EQ(-1, WebRtc_history_size(NULL)); + // 2) there is a mismatch between history size. + void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(tmp_handle, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle)); + EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize)); + EXPECT_EQ(-1, WebRtc_history_size(tmp_handle)); + + // WebRtc_set_lookahead() should return -1 if we try a value outside the + /// buffer. + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1)); + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1)); + + // WebRtc_set_allowed_offset() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) `allowed_offset` < 0. + EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0)); + EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1)); + + EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL)); + + // WebRtc_enable_robust_validation() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) Incorrect `enable` value (not 0 or 1). + EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0])); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1)); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2)); + + // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL + // pointer as `handle`. + EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL)); + + // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFloat(NULL, near_f_, spectrum_size_)); + // Use `handle_` which is properly created at SetUp(). + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFloat(handle_, NULL, spectrum_size_)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_ + 1)); + // `tmp_handle` is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle, near_f_, + spectrum_size_)); + + // WebRtc_DelayEstimatorProcessFix() should return -1 if we have: + // 1) NULL pointer as `handle`. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in near-end spectrum (Q-domain > 15). + // 5) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ( + -1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, + WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 16)); + // `tmp_handle` is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle, near_u16_, + spectrum_size_, 0)); + WebRtc_FreeDelayEstimator(tmp_handle); + + // WebRtc_last_delay() should return -1 if we have a NULL pointer as `handle`. + EXPECT_EQ(-1, WebRtc_last_delay(NULL)); + + // Free any local memory if needed. + WebRtc_FreeDelayEstimator(handle); +} + +TEST_F(DelayEstimatorTest, VerifyAllowedOffset) { + // Is set to zero by default. + EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_)); + for (int i = 1; i >= 0; i--) { + EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i)); + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + } +} + +TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) { + // Disabled by default. + EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_)); + for (size_t i = 0; i < kSizeEnable; ++i) { + EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i])); + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + } +} + +TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) { + // In this test we verify that the mean spectra are initialized after first + // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test + // also verifies the state is not left for zero spectra. + const float kZerosFloat[kSpectrumSize] = {0.0}; + const uint16_t kZerosU16[kSpectrumSize] = {0}; + + // For floating point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, + WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, spectrum_size_)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ( + -2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, spectrum_size_)); + EXPECT_EQ(1, self_->near_spectrum_initialized); + + // For fixed point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ( + 0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, self_->near_spectrum_initialized); +} + +TEST_F(DelayEstimatorTest, CorrectLastDelay) { + // In this test we verify that we get the correct last delay upon valid call. + // We simply process the same data until we leave the initialized state + // (`last_delay` = -2). Then we compare the Process() output with the + // last_delay() call. + + // TODO(bjornv): Update quality values for robust validation. + int last_delay = 0; + // Floating point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ( + 0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, spectrum_size_)); + last_delay = + WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, spectrum_size_); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); + + // Fixed point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + last_delay = + WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, spectrum_size_, 0); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator (far-end part). + + BinaryDelayEstimatorFarend* binary = binary_farend_; + // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input + // history size is less than 2. This is to make sure the buffer shifting + // applies properly. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary = WebRtc_CreateBinaryDelayEstimatorFarend(1); + EXPECT_TRUE(binary == NULL); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator. + + BinaryDelayEstimator* binary_handle = binary_; + // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL + // pointer as `binary_farend` or invalid input values. Upon failure, the + // `binary_handle` should be NULL. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(binary_handle == NULL); + binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1); + EXPECT_TRUE(binary_handle == NULL); +} + +TEST_F(DelayEstimatorTest, MeanEstimatorFix) { + // In this test we verify that we update the mean value in correct direction + // only. With "direction" we mean increase or decrease. + + int32_t mean_value = 4000; + int32_t mean_value_before = mean_value; + int32_t new_mean_value = mean_value * 2; + + // Increasing `mean_value`. + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_LT(mean_value_before, mean_value); + EXPECT_GT(new_mean_value, mean_value); + + // Decreasing `mean_value`. + new_mean_value = mean_value / 2; + mean_value_before = mean_value; + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_GT(mean_value_before, mean_value); + EXPECT_LT(new_mean_value, mean_value); +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) { + // In this test we verify that we get the correct delay estimates if we shift + // the signal accordingly. We create two Binary Delay Estimators and feed them + // with the same signals, so they should output the same results. + // We verify both causal and non-causal delays. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) { + // In this test we use the same setup as above, but we now feed the two Binary + // Delay Estimators with different signals, so they should output different + // results. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kNearOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) { + // In this test we use the same setup as above, feeding the two Binary + // Delay Estimators with the same signals. The difference is that we create + // them with different lookahead. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kLookaheadOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) { + // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the + // difference that `allowed_offset` is set for the reference binary delay + // estimator. + + binary_->allowed_offset = 10; + RunBinarySpectraTest(0, 0, 0, 0); + binary_->allowed_offset = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) { + void* farend_handle = + WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, kMaxDelay); + ASSERT_TRUE(farend_handle != NULL); + void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead); + ASSERT_TRUE(handle != NULL); + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle)); + WebRtc_FreeDelayEstimator(handle); + WebRtc_FreeDelayEstimatorFarend(farend_handle); +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) { + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(kDifferentLookahead, + WebRtc_set_lookahead(handle_, kDifferentLookahead)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(handle_, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); +} + +// TODO(bjornv): Add tests for SoftReset...(...). + +} // namespace + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc new file mode 100644 index 0000000000..3b1409cc0b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" + +#include +#include + +#include "modules/audio_processing/utility/delay_estimator.h" +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +// Only bit `kBandFirst` through bit `kBandLast` are processed and +// `kBandFirst` - `kBandLast` must be < 32. +constexpr int kBandFirst = 12; +constexpr int kBandLast = 43; + +static __inline uint32_t SetBit(uint32_t in, int pos) { + uint32_t mask = (1 << pos); + uint32_t out = (in | mask); + + return out; +} + +// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(), +// but for float. +// +// Inputs: +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). +// +// Input/Output: +// - mean_value : Pointer to the mean value for updating. +// +static void MeanEstimatorFloat(float new_value, + float scale, + float* mean_value) { + RTC_DCHECK_LT(scale, 1.0f); + *mean_value += (new_value - *mean_value) * scale; +} + +// Computes the binary spectrum by comparing the input `spectrum` with a +// `threshold_spectrum`. Float and fixed point versions. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum. +// +static uint32_t BinarySpectrumFix(const uint16_t* spectrum, + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + + RTC_DCHECK_LT(q_domain, 16); + + if (!(*threshold_initialized)) { + // Set the `threshold_spectrum` to half the input `spectrum` as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(`q_domain`) to Q15. + int32_t spectrum_q15 = ((int32_t)spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(`q_domain`) to Q15. + int32_t spectrum_q15 = ((int32_t)spectrum[i]) << (15 - q_domain); + // Update the `threshold_spectrum`. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert `spectrum` at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +static uint32_t BinarySpectrumFloat(const float* spectrum, + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + const float kScale = 1 / 64.0; + + if (!(*threshold_initialized)) { + // Set the `threshold_spectrum` to half the input `spectrum` as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the `threshold_spectrum`. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert `spectrum` at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +void WebRtc_FreeDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + + if (handle == NULL) { + return; + } + + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend); + self->binary_farend = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) { + DelayEstimatorFarend* self = NULL; + + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. + static_assert(kBandLast - kBandFirst < 32, ""); + + if (spectrum_size >= kBandLast) { + self = static_cast( + malloc(sizeof(DelayEstimatorFarend))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the binary far-end spectrum handling. + self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size); + memory_fail |= (self->binary_farend == NULL); + + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = static_cast( + malloc(spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_far_spectrum == NULL); + + self->spectrum_size = spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimatorFarend(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + + if (self == NULL) { + return -1; + } + + // Initialize far-end part of binary delay estimator. + WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend); + + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + + return 0; +} + +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + RTC_DCHECK(self); + WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift); +} + +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (far_q > 15) { + // If `far_q` is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum, + far_q, &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum, + &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +void WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (handle == NULL) { + return; + } + + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimator(self->binary_handle); + self->binary_handle = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) { + DelayEstimator* self = NULL; + DelayEstimatorFarend* farend = (DelayEstimatorFarend*)farend_handle; + + if (farend_handle != NULL) { + self = static_cast(malloc(sizeof(DelayEstimator))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the farend spectrum handling. + self->binary_handle = + WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead); + memory_fail |= (self->binary_handle == NULL); + + // Allocate memory for spectrum buffers. + self->mean_near_spectrum = static_cast( + malloc(farend->spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_near_spectrum == NULL); + + self->spectrum_size = farend->spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + + // Initialize binary delay estimator. + WebRtc_InitBinaryDelayEstimator(self->binary_handle); + + // Set averaged far and near end spectra to zero. + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->near_spectrum_initialized = 0; + + return 0; +} + +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift); +} + +int WebRtc_set_history_size(void* handle, int history_size) { + DelayEstimator* self = static_cast(handle); + + if ((self == NULL) || (history_size <= 1)) { + return -1; + } + return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size); +} + +int WebRtc_history_size(const void* handle) { + const DelayEstimator* self = static_cast(handle); + + if (self == NULL) { + return -1; + } + if (self->binary_handle->farend->history_size != + self->binary_handle->history_size) { + // Non matching history sizes. + return -1; + } + return self->binary_handle->history_size; +} + +int WebRtc_set_lookahead(void* handle, int lookahead) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + if ((lookahead > self->binary_handle->near_history_size - 1) || + (lookahead < 0)) { + return -1; + } + self->binary_handle->lookahead = lookahead; + return self->binary_handle->lookahead; +} + +int WebRtc_lookahead(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + return self->binary_handle->lookahead; +} + +int WebRtc_set_allowed_offset(void* handle, int allowed_offset) { + DelayEstimator* self = (DelayEstimator*)handle; + + if ((self == NULL) || (allowed_offset < 0)) { + return -1; + } + self->binary_handle->allowed_offset = allowed_offset; + return 0; +} + +int WebRtc_get_allowed_offset(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->allowed_offset; +} + +int WebRtc_enable_robust_validation(void* handle, int enable) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + if ((enable < 0) || (enable > 1)) { + return -1; + } + RTC_DCHECK(self->binary_handle); + self->binary_handle->robust_validation_enabled = enable; + return 0; +} + +int WebRtc_is_robust_validation_enabled(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->robust_validation_enabled; +} + +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q) { + DelayEstimator* self = (DelayEstimator*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (near_q > 15) { + // If `near_q` is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectra. + binary_spectrum = + BinarySpectrumFix(near_spectrum, self->mean_near_spectrum, near_q, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*)handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_binary_last_delay(self->binary_handle); +} + +float WebRtc_last_delay_quality(void* handle) { + DelayEstimator* self = (DelayEstimator*)handle; + RTC_DCHECK(self); + return WebRtc_binary_last_delay_quality(self->binary_handle); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h new file mode 100644 index 0000000000..a90cbe31cb --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on block by block basis. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ + +#include + +namespace webrtc { + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...) +void WebRtc_FreeDelayEstimatorFarend(void* handle); + +// Allocates the memory needed by the far-end part of the delay estimation. The +// memory needs to be initialized separately through +// WebRtc_InitDelayEstimatorFarend(...). +// +// Inputs: +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : The far-end history buffer size. A change in buffer +// size can be forced with WebRtc_set_history_size(). +// Note that the maximum delay which can be estimated is +// determined together with WebRtc_set_lookahead(). +// +// Return value: +// - void* : Created `handle`. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size); + +// Initializes the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...) +int WebRtc_InitDelayEstimatorFarend(void* handle); + +// Soft resets the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...). +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift); + +// Adds the far-end spectrum to the far-end history buffer. This spectrum is +// used as reference when calculating the delay using +// WebRtc_ProcessSpectrum(). +// +// Inputs: +// - far_spectrum : Far-end spectrum. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - far_q : The Q-domain of the far-end data. +// +// Output: +// - handle : Updated far-end instance. +// +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q); + +// See WebRtc_AddFarSpectrumFix() for description. +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size); + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +void WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately through WebRtc_InitDelayEstimator(...). +// +// Inputs: +// - farend_handle : Pointer to the far-end part of the delay estimation +// instance created prior to this call using +// WebRtc_CreateDelayEstimatorFarend(). +// +// Note that WebRtc_CreateDelayEstimator does not take +// ownership of `farend_handle`, which has to be torn +// down properly after this instance. +// +// - max_lookahead : Maximum amount of non-causal lookahead allowed. The +// actual amount of lookahead used can be controlled by +// WebRtc_set_lookahead(...). The default `lookahead` is +// set to `max_lookahead` at create time. Use +// WebRtc_set_lookahead(...) before start if a different +// value is desired. +// +// Using lookahead can detect cases in which a near-end +// signal occurs before the corresponding far-end signal. +// It will delay the estimate for the current block by an +// equal amount, and the returned values will be offset +// by it. +// +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be +// estimated. +// +// Note that the effective range of delay estimates is +// [-`lookahead`,... ,`history_size`-`lookahead`) +// where `history_size` is set through +// WebRtc_set_history_size(). +// +// Return value: +// - void* : Created `handle`. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead); + +// Initializes the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +int WebRtc_InitDelayEstimator(void* handle); + +// Soft resets the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift); + +// Sets the effective `history_size` used. Valid values from 2. We simply need +// at least two delays to compare to perform an estimate. If `history_size` is +// changed, buffers are reallocated filling in with zeros if necessary. +// Note that changing the `history_size` affects both buffers in far-end and +// near-end. Hence it is important to change all DelayEstimators that use the +// same reference far-end, to the same `history_size` value. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - history_size : Effective history size to be used. +// Return value: +// - new_history_size : The new history size used. If the memory was not able +// to be allocated 0 is returned. +int WebRtc_set_history_size(void* handle, int history_size); + +// Returns the history_size currently used. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_history_size(const void* handle); + +// Sets the amount of `lookahead` to use. Valid values are [0, max_lookahead] +// where `max_lookahead` was set at create time through +// WebRtc_CreateDelayEstimator(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// - lookahead : The amount of lookahead to be used. +// +// Return value: +// - new_lookahead : The actual amount of lookahead set, unless `handle` is +// a NULL pointer or `lookahead` is invalid, for which an +// error is returned. +int WebRtc_set_lookahead(void* handle, int lookahead); + +// Returns the amount of lookahead we currently use. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_lookahead(void* handle); + +// Sets the `allowed_offset` used in the robust validation scheme. If the +// delay estimator is used in an echo control component, this parameter is +// related to the filter length. In principle `allowed_offset` should be set to +// the echo control filter length minus the expected echo duration, i.e., the +// delay offset the echo control can handle without quality regression. The +// default value, used if not set manually, is zero. Note that `allowed_offset` +// has to be non-negative. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - allowed_offset : The amount of delay offset, measured in partitions, +// the echo control filter can handle. +int WebRtc_set_allowed_offset(void* handle, int allowed_offset); + +// Returns the `allowed_offset` in number of partitions. +int WebRtc_get_allowed_offset(const void* handle); + +// Enables/Disables a robust validation functionality in the delay estimation. +// This is by default set to disabled at create time. The state is preserved +// over a reset. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - enable : Enable (1) or disable (0) this feature. +int WebRtc_enable_robust_validation(void* handle, int enable); + +// Returns 1 if robust validation is enabled and 0 if disabled. +int WebRtc_is_robust_validation_enabled(const void* handle); + +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - near_spectrum : Pointer to the near-end spectrum data of the current +// block. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - near_q : The Q-domain of the near-end data. +// +// Output: +// - handle : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q); + +// See WebRtc_DelayEstimatorProcessFix() for description. +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_last_delay(void* handle); + +// Returns the estimation quality/probability of the last calculated delay +// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation +// quality is a value in the interval [0, 1]. The higher the value, the better +// the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated delay. +float WebRtc_last_delay_quality(void* handle); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build new file mode 100644 index 0000000000..11294e2aef --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/legacy_delay_estimator_gn/moz.build @@ -0,0 +1,222 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("legacy_delay_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc new file mode 100644 index 0000000000..88642fb12b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/pffft_wrapper.h" + +#include "rtc_base/checks.h" +#include "third_party/pffft/src/pffft.h" + +namespace webrtc { +namespace { + +size_t GetBufferSize(size_t fft_size, Pffft::FftType fft_type) { + return fft_size * (fft_type == Pffft::FftType::kReal ? 1 : 2); +} + +float* AllocatePffftBuffer(size_t size) { + return static_cast(pffft_aligned_malloc(size * sizeof(float))); +} + +} // namespace + +Pffft::FloatBuffer::FloatBuffer(size_t fft_size, FftType fft_type) + : size_(GetBufferSize(fft_size, fft_type)), + data_(AllocatePffftBuffer(size_)) {} + +Pffft::FloatBuffer::~FloatBuffer() { + pffft_aligned_free(data_); +} + +rtc::ArrayView Pffft::FloatBuffer::GetConstView() const { + return {data_, size_}; +} + +rtc::ArrayView Pffft::FloatBuffer::GetView() { + return {data_, size_}; +} + +Pffft::Pffft(size_t fft_size, FftType fft_type) + : fft_size_(fft_size), + fft_type_(fft_type), + pffft_status_(pffft_new_setup( + fft_size_, + fft_type == Pffft::FftType::kReal ? PFFFT_REAL : PFFFT_COMPLEX)), + scratch_buffer_( + AllocatePffftBuffer(GetBufferSize(fft_size_, fft_type_))) { + RTC_DCHECK(pffft_status_); + RTC_DCHECK(scratch_buffer_); +} + +Pffft::~Pffft() { + pffft_destroy_setup(pffft_status_); + pffft_aligned_free(scratch_buffer_); +} + +bool Pffft::IsValidFftSize(size_t fft_size, FftType fft_type) { + if (fft_size == 0) { + return false; + } + // PFFFT only supports transforms for inputs of length N of the form + // N = (2^a)*(3^b)*(5^c) where b >=0 and c >= 0 and a >= 5 for the real FFT + // and a >= 4 for the complex FFT. + constexpr int kFactors[] = {2, 3, 5}; + int factorization[] = {0, 0, 0}; + int n = static_cast(fft_size); + for (int i = 0; i < 3; ++i) { + while (n % kFactors[i] == 0) { + n = n / kFactors[i]; + factorization[i]++; + } + } + int a_min = (fft_type == Pffft::FftType::kReal) ? 5 : 4; + return factorization[0] >= a_min && n == 1; +} + +bool Pffft::IsSimdEnabled() { + return pffft_simd_size() > 1; +} + +std::unique_ptr Pffft::CreateBuffer() const { + // Cannot use make_unique from absl because Pffft is the only friend of + // Pffft::FloatBuffer. + std::unique_ptr buffer( + new Pffft::FloatBuffer(fft_size_, fft_type_)); + return buffer; +} + +void Pffft::ForwardTransform(const FloatBuffer& in, + FloatBuffer* out, + bool ordered) { + RTC_DCHECK_EQ(in.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(in.size(), out->size()); + RTC_DCHECK(scratch_buffer_); + if (ordered) { + pffft_transform_ordered(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_FORWARD); + } else { + pffft_transform(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_FORWARD); + } +} + +void Pffft::BackwardTransform(const FloatBuffer& in, + FloatBuffer* out, + bool ordered) { + RTC_DCHECK_EQ(in.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(in.size(), out->size()); + RTC_DCHECK(scratch_buffer_); + if (ordered) { + pffft_transform_ordered(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_BACKWARD); + } else { + pffft_transform(pffft_status_, in.const_data(), out->data(), + scratch_buffer_, PFFFT_BACKWARD); + } +} + +void Pffft::FrequencyDomainConvolve(const FloatBuffer& fft_x, + const FloatBuffer& fft_y, + FloatBuffer* out, + float scaling) { + RTC_DCHECK_EQ(fft_x.size(), GetBufferSize(fft_size_, fft_type_)); + RTC_DCHECK_EQ(fft_x.size(), fft_y.size()); + RTC_DCHECK_EQ(fft_x.size(), out->size()); + pffft_zconvolve_accumulate(pffft_status_, fft_x.const_data(), + fft_y.const_data(), out->data(), scaling); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h new file mode 100644 index 0000000000..983c2fd1bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ + +#include + +#include "api/array_view.h" + +// Forward declaration. +struct PFFFT_Setup; + +namespace webrtc { + +// Pretty-Fast Fast Fourier Transform (PFFFT) wrapper class. +// Not thread safe. +class Pffft { + public: + enum class FftType { kReal, kComplex }; + + // 1D floating point buffer used as input/output data type for the FFT ops. + // It must be constructed using Pffft::CreateBuffer(). + class FloatBuffer { + public: + FloatBuffer(const FloatBuffer&) = delete; + FloatBuffer& operator=(const FloatBuffer&) = delete; + ~FloatBuffer(); + + rtc::ArrayView GetConstView() const; + rtc::ArrayView GetView(); + + private: + friend class Pffft; + FloatBuffer(size_t fft_size, FftType fft_type); + const float* const_data() const { return data_; } + float* data() { return data_; } + size_t size() const { return size_; } + + const size_t size_; + float* const data_; + }; + + // TODO(https://crbug.com/webrtc/9577): Consider adding a factory and making + // the ctor private. + // static std::unique_ptr Create(size_t fft_size, + // FftType fft_type); Ctor. `fft_size` must be a supported size (see + // Pffft::IsValidFftSize()). If not supported, the code will crash. + Pffft(size_t fft_size, FftType fft_type); + Pffft(const Pffft&) = delete; + Pffft& operator=(const Pffft&) = delete; + ~Pffft(); + + // Returns true if the FFT size is supported. + static bool IsValidFftSize(size_t fft_size, FftType fft_type); + + // Returns true if SIMD code optimizations are being used. + static bool IsSimdEnabled(); + + // Creates a buffer of the right size. + std::unique_ptr CreateBuffer() const; + + // TODO(https://crbug.com/webrtc/9577): Overload with rtc::ArrayView args. + // Computes the forward fast Fourier transform. + void ForwardTransform(const FloatBuffer& in, FloatBuffer* out, bool ordered); + // Computes the backward fast Fourier transform. + void BackwardTransform(const FloatBuffer& in, FloatBuffer* out, bool ordered); + + // Multiplies the frequency components of `fft_x` and `fft_y` and accumulates + // them into `out`. The arrays must have been obtained with + // ForwardTransform(..., /*ordered=*/false) - i.e., `fft_x` and `fft_y` must + // not be ordered. + void FrequencyDomainConvolve(const FloatBuffer& fft_x, + const FloatBuffer& fft_y, + FloatBuffer* out, + float scaling = 1.f); + + private: + const size_t fft_size_; + const FftType fft_type_; + PFFFT_Setup* pffft_status_; + float* const scratch_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_PFFFT_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build new file mode 100644 index 0000000000..02898359bf --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_gn/moz.build @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("pffft_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc new file mode 100644 index 0000000000..2ad6849cd4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/utility/pffft_wrapper_unittest.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/pffft_wrapper.h" + +#include +#include +#include + +#include "test/gtest.h" +#include "third_party/pffft/src/pffft.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr size_t kMaxValidSizeCheck = 1024; + +static constexpr int kFftSizes[] = { + 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5 * 96, 512, + 576, 5 * 128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864}; + +void CreatePffftWrapper(size_t fft_size, Pffft::FftType fft_type) { + Pffft pffft_wrapper(fft_size, fft_type); +} + +float* AllocateScratchBuffer(size_t fft_size, bool complex_fft) { + return static_cast( + pffft_aligned_malloc(fft_size * (complex_fft ? 2 : 1) * sizeof(float))); +} + +double frand() { + return std::rand() / static_cast(RAND_MAX); +} + +void ExpectArrayViewsEquality(rtc::ArrayView a, + rtc::ArrayView b) { + ASSERT_EQ(a.size(), b.size()); + for (size_t i = 0; i < a.size(); ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(a[i], b[i]); + } +} + +// Compares the output of the PFFFT C++ wrapper to that of the C PFFFT. +// Bit-exactness is expected. +void PffftValidateWrapper(size_t fft_size, bool complex_fft) { + // Always use the same seed to avoid flakiness. + std::srand(0); + + // Init PFFFT. + PFFFT_Setup* pffft_status = + pffft_new_setup(fft_size, complex_fft ? PFFFT_COMPLEX : PFFFT_REAL); + ASSERT_TRUE(pffft_status) << "FFT size (" << fft_size << ") not supported."; + size_t num_floats = fft_size * (complex_fft ? 2 : 1); + int num_bytes = static_cast(num_floats) * sizeof(float); + float* in = static_cast(pffft_aligned_malloc(num_bytes)); + float* out = static_cast(pffft_aligned_malloc(num_bytes)); + float* scratch = AllocateScratchBuffer(fft_size, complex_fft); + + // Init PFFFT C++ wrapper. + Pffft::FftType fft_type = + complex_fft ? Pffft::FftType::kComplex : Pffft::FftType::kReal; + ASSERT_TRUE(Pffft::IsValidFftSize(fft_size, fft_type)); + Pffft pffft_wrapper(fft_size, fft_type); + auto in_wrapper = pffft_wrapper.CreateBuffer(); + auto out_wrapper = pffft_wrapper.CreateBuffer(); + + // Input and output buffers views. + rtc::ArrayView in_view(in, num_floats); + rtc::ArrayView out_view(out, num_floats); + auto in_wrapper_view = in_wrapper->GetView(); + EXPECT_EQ(in_wrapper_view.size(), num_floats); + auto out_wrapper_view = out_wrapper->GetConstView(); + EXPECT_EQ(out_wrapper_view.size(), num_floats); + + // Random input data. + for (size_t i = 0; i < num_floats; ++i) { + in_wrapper_view[i] = in[i] = static_cast(frand() * 2.0 - 1.0); + } + + // Forward transform. + pffft_transform(pffft_status, in, out, scratch, PFFFT_FORWARD); + pffft_wrapper.ForwardTransform(*in_wrapper, out_wrapper.get(), + /*ordered=*/false); + ExpectArrayViewsEquality(out_view, out_wrapper_view); + + // Copy the FFT results into the input buffers to compute the backward FFT. + std::copy(out_view.begin(), out_view.end(), in_view.begin()); + std::copy(out_wrapper_view.begin(), out_wrapper_view.end(), + in_wrapper_view.begin()); + + // Backward transform. + pffft_transform(pffft_status, in, out, scratch, PFFFT_BACKWARD); + pffft_wrapper.BackwardTransform(*in_wrapper, out_wrapper.get(), + /*ordered=*/false); + ExpectArrayViewsEquality(out_view, out_wrapper_view); + + pffft_destroy_setup(pffft_status); + pffft_aligned_free(in); + pffft_aligned_free(out); + pffft_aligned_free(scratch); +} + +} // namespace + +TEST(PffftTest, CreateWrapperWithValidSize) { + for (size_t fft_size = 0; fft_size < kMaxValidSizeCheck; ++fft_size) { + SCOPED_TRACE(fft_size); + if (Pffft::IsValidFftSize(fft_size, Pffft::FftType::kReal)) { + CreatePffftWrapper(fft_size, Pffft::FftType::kReal); + } + if (Pffft::IsValidFftSize(fft_size, Pffft::FftType::kComplex)) { + CreatePffftWrapper(fft_size, Pffft::FftType::kComplex); + } + } +} + +#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +class PffftInvalidSizeDeathTest : public ::testing::Test, + public ::testing::WithParamInterface { +}; + +TEST_P(PffftInvalidSizeDeathTest, DoNotCreateRealWrapper) { + size_t fft_size = GetParam(); + ASSERT_FALSE(Pffft::IsValidFftSize(fft_size, Pffft::FftType::kReal)); + EXPECT_DEATH(CreatePffftWrapper(fft_size, Pffft::FftType::kReal), ""); +} + +TEST_P(PffftInvalidSizeDeathTest, DoNotCreateComplexWrapper) { + size_t fft_size = GetParam(); + ASSERT_FALSE(Pffft::IsValidFftSize(fft_size, Pffft::FftType::kComplex)); + EXPECT_DEATH(CreatePffftWrapper(fft_size, Pffft::FftType::kComplex), ""); +} + +INSTANTIATE_TEST_SUITE_P(PffftTest, + PffftInvalidSizeDeathTest, + ::testing::Values(17, + 33, + 65, + 97, + 129, + 161, + 193, + 257, + 289, + 385, + 481, + 513, + 577, + 641, + 801, + 865, + 1025)); + +#endif + +// TODO(https://crbug.com/webrtc/9577): Enable once SIMD is always enabled. +TEST(PffftTest, DISABLED_CheckSimd) { + EXPECT_TRUE(Pffft::IsSimdEnabled()); +} + +TEST(PffftTest, FftBitExactness) { + for (int fft_size : kFftSizes) { + SCOPED_TRACE(fft_size); + if (fft_size != 16) { + PffftValidateWrapper(fft_size, /*complex_fft=*/false); + } + PffftValidateWrapper(fft_size, /*complex_fft=*/true); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn new file mode 100644 index 0000000000..71e079d3a3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/BUILD.gn @@ -0,0 +1,69 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") +rtc_library("vad") { + visibility = [ + "../*", + "../../../rtc_tools:*", + ] + sources = [ + "common.h", + "gmm.cc", + "gmm.h", + "noise_gmm_tables.h", + "pitch_based_vad.cc", + "pitch_based_vad.h", + "pitch_internal.cc", + "pitch_internal.h", + "pole_zero_filter.cc", + "pole_zero_filter.h", + "standalone_vad.cc", + "standalone_vad.h", + "vad_audio_proc.cc", + "vad_audio_proc.h", + "vad_audio_proc_internal.h", + "vad_circular_buffer.cc", + "vad_circular_buffer.h", + "voice_activity_detector.cc", + "voice_activity_detector.h", + "voice_gmm_tables.h", + ] + deps = [ + "../../../audio/utility:audio_frame_operations", + "../../../common_audio", + "../../../common_audio:common_audio_c", + "../../../common_audio/third_party/ooura:fft_size_256", + "../../../rtc_base:checks", + "../../audio_coding:isac_vad", + ] +} + +if (rtc_include_tests) { + rtc_library("vad_unittests") { + testonly = true + sources = [ + "gmm_unittest.cc", + "pitch_based_vad_unittest.cc", + "pitch_internal_unittest.cc", + "pole_zero_filter_unittest.cc", + "standalone_vad_unittest.cc", + "vad_audio_proc_unittest.cc", + "vad_circular_buffer_unittest.cc", + "voice_activity_detector_unittest.cc", + ] + deps = [ + ":vad", + "../../../common_audio", + "../../../test:fileutils", + "../../../test:test_support", + "//testing/gmock", + "//testing/gtest", + ] + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/vad/common.h b/third_party/libwebrtc/modules/audio_processing/vad/common.h new file mode 100644 index 0000000000..b5a5fb385b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/common.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ + +#include + +static const int kSampleRateHz = 16000; +static const size_t kLength10Ms = kSampleRateHz / 100; +static const size_t kMaxNumFrames = 4; + +struct AudioFeatures { + double log_pitch_gain[kMaxNumFrames]; + double pitch_lag_hz[kMaxNumFrames]; + double spectral_peak[kMaxNumFrames]; + double rms[kMaxNumFrames]; + size_t num_frames; + bool silence; +}; + +#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc b/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc new file mode 100644 index 0000000000..3b8764c4d0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include + +namespace webrtc { + +static const int kMaxDimension = 10; + +static void RemoveMean(const double* in, + const double* mean_vec, + int dimension, + double* out) { + for (int n = 0; n < dimension; ++n) + out[n] = in[n] - mean_vec[n]; +} + +static double ComputeExponent(const double* in, + const double* covar_inv, + int dimension) { + double q = 0; + for (int i = 0; i < dimension; ++i) { + double v = 0; + for (int j = 0; j < dimension; j++) + v += (*covar_inv++) * in[j]; + q += v * in[i]; + } + q *= -0.5; + return q; +} + +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { + if (gmm_parameters.dimension > kMaxDimension) { + return -1; // This is invalid pdf so the caller can check this. + } + double f = 0; + double v[kMaxDimension]; + const double* mean_vec = gmm_parameters.mean; + const double* covar_inv = gmm_parameters.covar_inverse; + + for (int n = 0; n < gmm_parameters.num_mixtures; n++) { + RemoveMean(x, mean_vec, gmm_parameters.dimension, v); + double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + + gmm_parameters.weight[n]; + f += exp(q); + mean_vec += gmm_parameters.dimension; + covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; + } + return f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm.h b/third_party/libwebrtc/modules/audio_processing/vad/gmm.h new file mode 100644 index 0000000000..d9d68ecfdc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_ +#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_ + +namespace webrtc { + +// A structure that specifies a GMM. +// A GMM is formulated as +// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + +// w[num_mixtures - 1] * mixture[num_mixtures - 1]; +// Where a 'mixture' is a Gaussian density. + +struct GmmParameters { + // weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n])); + // where cov[n] is the covariance matrix of mixture n; + const double* weight; + // pointer to the first element of a `num_mixtures`x`dimension` matrix + // where kth row is the mean of the kth mixture. + const double* mean; + // pointer to the first element of a `num_mixtures`x`dimension`x`dimension` + // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance + // matrix of the kth mixture. + const double* covar_inverse; + // Dimensionality of the mixtures. + int dimension; + // number of the mixtures. + int num_mixtures; +}; + +// Evaluate the given GMM, according to `gmm_parameters`, at the given point +// `x`. If the dimensionality of the given GMM is larger that the maximum +// acceptable dimension by the following function -1 is returned. +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc new file mode 100644 index 0000000000..d895afab7b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/gmm_unittest.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include + +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(GmmTest, EvaluateGmm) { + GmmParameters noise_gmm; + GmmParameters voice_gmm; + + // Setup noise GMM. + noise_gmm.dimension = kNoiseGmmDim; + noise_gmm.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm.weight = kNoiseGmmWeights; + noise_gmm.mean = &kNoiseGmmMean[0][0]; + noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm.dimension = kVoiceGmmDim; + voice_gmm.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm.weight = kVoiceGmmWeights; + voice_gmm.mean = &kVoiceGmmMean[0][0]; + voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; + + // Test vectors. These are the mean of the GMM means. + const double kXVoice[kVoiceGmmDim] = {-1.35893162459863, 602.862491970368, + 178.022069191324}; + const double kXNoise[kNoiseGmmDim] = {-2.33443722724409, 2827.97828765184, + 141.114178166812}; + + // Expected pdf values. These values are computed in MATLAB using EvalGmm.m + const double kPdfNoise = 1.88904409403101e-07; + const double kPdfVoice = 1.30453996982266e-06; + + // Relative error should be smaller that the following value. + const double kAcceptedRelativeErr = 1e-10; + + // Test Voice. + double pdf = EvaluateGmm(kXVoice, voice_gmm); + EXPECT_GT(pdf, 0); + double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice; + EXPECT_LE(relative_error, kAcceptedRelativeErr); + + // Test Noise. + pdf = EvaluateGmm(kXNoise, noise_gmm); + EXPECT_GT(pdf, 0); + relative_error = fabs(pdf - kPdfNoise) / kPdfNoise; + EXPECT_LE(relative_error, kAcceptedRelativeErr); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h b/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h new file mode 100644 index 0000000000..944a5401cc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/noise_gmm_tables.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for inactive segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ + +namespace webrtc { + +static const int kNoiseGmmNumMixtures = 12; +static const int kNoiseGmmDim = 3; + +static const double + kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = { + {{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, + {4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, + {1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, + {{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, + {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, + {5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, + {{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, + {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, + {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, + {{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, + {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, + {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, + {{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, + {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, + {2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, + {{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, + {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, + {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, + {{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, + {5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, + {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, + {{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, + {5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, + {6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, + {{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, + {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, + {5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, + {{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, + {4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, + {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, + {{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, + {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, + {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, + {{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, + {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, + {5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; + +static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { + {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, + {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, + {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, + {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, + {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, + {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, + {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, + {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, + {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, + {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, + {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, + {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; + +static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { + -1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01, + -1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01, + -1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01, + -1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc new file mode 100644 index 0000000000..68e60dc66a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include + +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/vad_circular_buffer.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" + +namespace webrtc { + +static_assert(kNoiseGmmDim == kVoiceGmmDim, + "noise and voice gmm dimension not equal"); + +// These values should match MATLAB counterparts for unit-tests to pass. +static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. +static const double kInitialPriorProbability = 0.3; +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static double LimitProbability(double p) { + const double kLimHigh = 0.99; + const double kLimLow = 0.01; + + if (p > kLimHigh) + p = kLimHigh; + else if (p < kLimLow) + p = kLimLow; + return p; +} + +PitchBasedVad::PitchBasedVad() + : p_prior_(kInitialPriorProbability), + circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { + // Setup noise GMM. + noise_gmm_.dimension = kNoiseGmmDim; + noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm_.weight = kNoiseGmmWeights; + noise_gmm_.mean = &kNoiseGmmMean[0][0]; + noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm_.dimension = kVoiceGmmDim; + voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm_.weight = kVoiceGmmWeights; + voice_gmm_.mean = &kVoiceGmmMean[0][0]; + voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; +} + +PitchBasedVad::~PitchBasedVad() {} + +int PitchBasedVad::VoicingProbability(const AudioFeatures& features, + double* p_combined) { + double p; + double gmm_features[3]; + double pdf_features_given_voice; + double pdf_features_given_noise; + // These limits are the same in matlab implementation 'VoicingProbGMM().' + const double kLimLowLogPitchGain = -2.0; + const double kLimHighLogPitchGain = -0.9; + const double kLimLowSpectralPeak = 200; + const double kLimHighSpectralPeak = 2000; + const double kEps = 1e-12; + for (size_t n = 0; n < features.num_frames; n++) { + gmm_features[0] = features.log_pitch_gain[n]; + gmm_features[1] = features.spectral_peak[n]; + gmm_features[2] = features.pitch_lag_hz[n]; + + pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); + pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); + + if (features.spectral_peak[n] < kLimLowSpectralPeak || + features.spectral_peak[n] > kLimHighSpectralPeak || + features.log_pitch_gain[n] < kLimLowLogPitchGain) { + pdf_features_given_voice = kEps * pdf_features_given_noise; + } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { + pdf_features_given_noise = kEps * pdf_features_given_voice; + } + + p = p_prior_ * pdf_features_given_voice / + (pdf_features_given_voice * p_prior_ + + pdf_features_given_noise * (1 - p_prior_)); + + p = LimitProbability(p); + + // Combine pitch-based probability with standalone probability, before + // updating prior probabilities. + double prod_active = p * p_combined[n]; + double prod_inactive = (1 - p) * (1 - p_combined[n]); + p_combined[n] = prod_active / (prod_active + prod_inactive); + + if (UpdatePrior(p_combined[n]) < 0) + return -1; + // Limit prior probability. With a zero prior probability the posterior + // probability is always zero. + p_prior_ = LimitProbability(p_prior_); + } + return 0; +} + +int PitchBasedVad::UpdatePrior(double p) { + circular_buffer_->Insert(p); + if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, + kLowProbabilityThreshold) < 0) + return -1; + p_prior_ = circular_buffer_->Mean(); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h new file mode 100644 index 0000000000..fa3abc2d28 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ + +#include + +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/gmm.h" + +namespace webrtc { + +class VadCircularBuffer; + +// Computes the probability of the input audio frame to be active given +// the corresponding pitch-gain and lag of the frame. +class PitchBasedVad { + public: + PitchBasedVad(); + ~PitchBasedVad(); + + // Compute pitch-based voicing probability, given the features. + // features: a structure containing features required for computing voicing + // probabilities. + // + // p_combined: an array which contains the combined activity probabilities + // computed prior to the call of this function. The method, + // then, computes the voicing probabilities and combine them + // with the given values. The result are returned in `p`. + int VoicingProbability(const AudioFeatures& features, double* p_combined); + + private: + int UpdatePrior(double p); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + GmmParameters noise_gmm_; + GmmParameters voice_gmm_; + + double p_prior_; + + std::unique_ptr circular_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc new file mode 100644 index 0000000000..4a8331a769 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include +#include + +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(PitchBasedVadTest, VoicingProbabilityTest) { + std::string spectral_peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb"); + ASSERT_TRUE(spectral_peak_file != NULL); + + std::string pitch_gain_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat"); + FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_gain_file != NULL); + + std::string pitch_lag_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat"); + FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_lag_file != NULL); + + std::string voicing_prob_file_name = + test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat"); + FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb"); + ASSERT_TRUE(voicing_prob_file != NULL); + + PitchBasedVad vad_; + + double reference_activity_probability; + + AudioFeatures audio_features; + memset(&audio_features, 0, sizeof(audio_features)); + audio_features.num_frames = 1; + while (fread(audio_features.spectral_peak, + sizeof(audio_features.spectral_peak[0]), 1, + spectral_peak_file) == 1u) { + double p; + ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, + sizeof(audio_features.log_pitch_gain[0]), 1, + pitch_gain_file)); + ASSERT_EQ(1u, + fread(audio_features.pitch_lag_hz, + sizeof(audio_features.pitch_lag_hz[0]), 1, pitch_lag_file)); + ASSERT_EQ(1u, fread(&reference_activity_probability, + sizeof(reference_activity_probability), 1, + voicing_prob_file)); + + p = 0.5; // Initialize to the neutral value for combining probabilities. + EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p)); + EXPECT_NEAR(p, reference_activity_probability, 0.01); + } + + fclose(spectral_peak_file); + fclose(pitch_gain_file); + fclose(pitch_lag_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc new file mode 100644 index 0000000000..8f86918644 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include + +namespace webrtc { + +// A 4-to-3 linear interpolation. +// The interpolation constants are derived as following: +// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval +// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is +// like interpolating 4-to-6 and keep the odd samples. +// The reason behind this is that LPC coefficients are computed for the first +// half of each 10ms interval. +static void PitchInterpolation(double old_val, const double* in, double* out) { + out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; + out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; + out[2] = 0.5 * in[2] + 0.5 * in[3]; +} + +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz) { + // Gain interpolation is in log-domain, also returned in log-domain. + for (int n = 0; n < num_in_frames; n++) + gains[n] = log(gains[n] + 1e-12); + + // Interpolate lags and gains. + PitchInterpolation(*log_old_gain, gains, log_pitch_gain); + *log_old_gain = gains[num_in_frames - 1]; + PitchInterpolation(*old_lag, lags, pitch_lag_hz); + *old_lag = lags[num_in_frames - 1]; + + // Convert pitch-lags to Hertz. + for (int n = 0; n < num_out_frames; n++) { + pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h new file mode 100644 index 0000000000..e382c1fbde --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ + +namespace webrtc { + +// TODO(turajs): Write a description of this function. Also be consistent with +// usage of `sampling_rate_hz` vs `kSamplingFreqHz`. +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc new file mode 100644 index 0000000000..c851421ba7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal_unittest.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include + +#include "test/gtest.h" + +namespace webrtc { + +TEST(PitchInternalTest, test) { + const int kSamplingRateHz = 8000; + const int kNumInputParameters = 4; + const int kNumOutputParameters = 3; + // Inputs + double log_old_gain = log(0.5); + double gains[] = {0.6, 0.2, 0.5, 0.4}; + + double old_lag = 70; + double lags[] = {90, 111, 122, 50}; + + // Expected outputs + double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507, + -0.80471895621705}; + double expected_log_old_gain = log(gains[kNumInputParameters - 1]); + + double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121, + 93.0232558139535}; + double expected_old_lag = lags[kNumInputParameters - 1]; + + double log_pitch_gain[kNumOutputParameters]; + double pitch_lag_hz[kNumInputParameters]; + + GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters, + kNumOutputParameters, &log_old_gain, &old_lag, + log_pitch_gain, pitch_lag_hz); + + for (int n = 0; n < 3; n++) { + EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6); + EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8); + } + EXPECT_NEAR(old_lag, expected_old_lag, 1e-6); + EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc new file mode 100644 index 0000000000..e7a611309c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include + +#include + +namespace webrtc { + +PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) { + if (order_numerator > kMaxFilterOrder || + order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 || + numerator_coefficients == NULL || denominator_coefficients == NULL) + return NULL; + return new PoleZeroFilter(numerator_coefficients, order_numerator, + denominator_coefficients, order_denominator); +} + +PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) + : past_input_(), + past_output_(), + numerator_coefficients_(), + denominator_coefficients_(), + order_numerator_(order_numerator), + order_denominator_(order_denominator), + highest_order_(std::max(order_denominator, order_numerator)) { + memcpy(numerator_coefficients_, numerator_coefficients, + sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); + memcpy(denominator_coefficients_, denominator_coefficients, + sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); + + if (denominator_coefficients_[0] != 1) { + for (size_t n = 0; n <= order_numerator_; n++) + numerator_coefficients_[n] /= denominator_coefficients_[0]; + for (size_t n = 0; n <= order_denominator_; n++) + denominator_coefficients_[n] /= denominator_coefficients_[0]; + } +} + +template +static float FilterArPast(const T* past, + size_t order, + const float* coefficients) { + float sum = 0.0f; + size_t past_index = order - 1; + for (size_t k = 1; k <= order; k++, past_index--) + sum += coefficients[k] * past[past_index]; + return sum; +} + +int PoleZeroFilter::Filter(const int16_t* in, + size_t num_input_samples, + float* output) { + if (in == NULL || output == NULL) + return -1; + // This is the typical case, just a memcpy. + const size_t k = std::min(num_input_samples, highest_order_); + size_t n; + for (n = 0; n < k; n++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&past_input_[n], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&past_output_[n], order_denominator_, + denominator_coefficients_); + + past_input_[n + order_numerator_] = in[n]; + past_output_[n + order_denominator_] = output[n]; + } + if (highest_order_ < num_input_samples) { + for (size_t m = 0; n < num_input_samples; n++, m++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += + FilterArPast(&in[m], order_numerator_, numerator_coefficients_); + output[n] -= FilterArPast(&output[m], order_denominator_, + denominator_coefficients_); + } + // Record into the past signal. + memcpy(past_input_, &in[num_input_samples - order_numerator_], + sizeof(in[0]) * order_numerator_); + memcpy(past_output_, &output[num_input_samples - order_denominator_], + sizeof(output[0]) * order_denominator_); + } else { + // Odd case that the length of the input is shorter that filter order. + memmove(past_input_, &past_input_[num_input_samples], + order_numerator_ * sizeof(past_input_[0])); + memmove(past_output_, &past_output_[num_input_samples], + order_denominator_ * sizeof(past_output_[0])); + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h new file mode 100644 index 0000000000..11a05114d1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ + +#include +#include + +namespace webrtc { + +class PoleZeroFilter { + public: + ~PoleZeroFilter() {} + + static PoleZeroFilter* Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + int Filter(const int16_t* in, size_t num_input_samples, float* output); + + private: + PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + static const int kMaxFilterOrder = 24; + + int16_t past_input_[kMaxFilterOrder * 2]; + float past_output_[kMaxFilterOrder * 2]; + + float numerator_coefficients_[kMaxFilterOrder + 1]; + float denominator_coefficients_[kMaxFilterOrder + 1]; + + size_t order_numerator_; + size_t order_denominator_; + size_t highest_order_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc new file mode 100644 index 0000000000..8088b40125 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include +#include + +#include + +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +static const int kInputSamples = 50; + +static const int16_t kInput[kInputSamples] = { + -2136, -7116, 10715, 2464, 3164, 8139, 11393, 24013, -32117, -5544, + -27740, 10181, 14190, -24055, -15912, 17393, 6359, -9950, -13894, 32432, + -23944, 3437, -8381, 19768, 3087, -19795, -5920, 13310, 1407, 3876, + 4059, 3524, -23130, 19121, -27900, -24840, 4089, 21422, -3625, 3015, + -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469, 29727, 32229}; + +static const float kReferenceOutput[kInputSamples] = { + -2082.230472f, -6878.572941f, 10697.090871f, 2358.373952f, + 2973.936512f, 7738.580650f, 10690.803213f, 22687.091576f, + -32676.684717f, -5879.621684f, -27359.297432f, 10368.735888f, + 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f, + 6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, + -23391.849347f, 3953.805667f, -7667.761363f, 19995.153447f, + 3185.575477f, -19207.365160f, -5143.103201f, 13756.317237f, + 1779.654794f, 4142.269755f, 4209.475034f, 3572.991789f, + -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f, + 5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, + -10135.510093f, 29241.509970f, 13394.397233f, 6340.721417f, + -19510.207905f, -5908.442086f, 15882.301634f, -9211.335255f, + 29253.056735f, 30874.443046f}; + +class PoleZeroFilterTest : public ::testing::Test { + protected: + PoleZeroFilterTest() + : my_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) {} + + ~PoleZeroFilterTest() override {} + + void FilterSubframes(int num_subframes); + + private: + void TestClean(); + std::unique_ptr my_filter_; +}; + +void PoleZeroFilterTest::FilterSubframes(int num_subframes) { + float output[kInputSamples]; + const int num_subframe_samples = kInputSamples / num_subframes; + EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples); + + for (int n = 0; n < num_subframes; n++) { + my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples, + &output[n * num_subframe_samples]); + } + for (int n = 0; n < kInputSamples; n++) { + EXPECT_NEAR(output[n], kReferenceOutput[n], 1); + } +} + +TEST_F(PoleZeroFilterTest, OneSubframe) { + FilterSubframes(1); +} + +TEST_F(PoleZeroFilterTest, TwoSubframes) { + FilterSubframes(2); +} + +TEST_F(PoleZeroFilterTest, FiveSubframes) { + FilterSubframes(5); +} + +TEST_F(PoleZeroFilterTest, TenSubframes) { + FilterSubframes(10); +} + +TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) { + FilterSubframes(25); +} + +TEST_F(PoleZeroFilterTest, FiftySubframes) { + FilterSubframes(50); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc new file mode 100644 index 0000000000..1397668eb4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include + +#include "common_audio/vad/include/webrtc_vad.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const int kDefaultStandaloneVadMode = 3; + +StandaloneVad::StandaloneVad(VadInst* vad) + : vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {} + +StandaloneVad::~StandaloneVad() { + WebRtcVad_Free(vad_); +} + +StandaloneVad* StandaloneVad::Create() { + VadInst* vad = WebRtcVad_Create(); + if (!vad) + return nullptr; + + int err = WebRtcVad_Init(vad); + err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); + if (err != 0) { + WebRtcVad_Free(vad); + return nullptr; + } + return new StandaloneVad(vad); +} + +int StandaloneVad::AddAudio(const int16_t* data, size_t length) { + if (length != kLength10Ms) + return -1; + + if (index_ + length > kLength10Ms * kMaxNum10msFrames) + // Reset the buffer if it's full. + // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we + // can forgo the buffering. + index_ = 0; + + memcpy(&buffer_[index_], data, sizeof(int16_t) * length); + index_ += length; + return 0; +} + +int StandaloneVad::GetActivity(double* p, size_t length_p) { + if (index_ == 0) + return -1; + + const size_t num_frames = index_ / kLength10Ms; + if (num_frames > length_p) + return -1; + RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_)); + + int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); + if (activity < 0) + return -1; + else if (activity == 0) + p[0] = 0.01; // Arbitrary but small and non-zero. + else + p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities. + for (size_t n = 1; n < num_frames; n++) + p[n] = p[0]; + // Reset the buffer to start from the beginning. + index_ = 0; + return activity; +} + +int StandaloneVad::set_mode(int mode) { + if (mode < 0 || mode > 3) + return -1; + if (WebRtcVad_set_mode(vad_, mode) != 0) + return -1; + + mode_ = mode; + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h new file mode 100644 index 0000000000..b08463374e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ +#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ + +#include +#include + +#include "common_audio/vad/include/webrtc_vad.h" +#include "modules/audio_processing/vad/common.h" + +namespace webrtc { + +class StandaloneVad { + public: + static StandaloneVad* Create(); + ~StandaloneVad(); + + // Outputs + // p: a buffer where probabilities are written to. + // length_p: number of elements of `p`. + // + // return value: + // -1: if no audio is stored or VAD returns error. + // 0: in success. + // In case of error the content of `activity` is unchanged. + // + // Note that due to a high false-positive (VAD decision is active while the + // processed audio is just background noise) rate, stand-alone VAD is used as + // a one-sided indicator. The activity probability is 0.5 if the frame is + // classified as active, and the probability is 0.01 if the audio is + // classified as passive. In this way, when probabilities are combined, the + // effect of the stand-alone VAD is neutral if the input is classified as + // active. + int GetActivity(double* p, size_t length_p); + + // Expecting 10 ms of 16 kHz audio to be pushed in. + int AddAudio(const int16_t* data, size_t length); + + // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most + // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, + // otherwise 0 is returned. + int set_mode(int mode); + // Get the agressiveness of the current VAD. + int mode() const { return mode_; } + + private: + explicit StandaloneVad(VadInst* vad); + + static const size_t kMaxNum10msFrames = 3; + + // TODO(turajs): Is there a way to use scoped-pointer here? + VadInst* vad_; + int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; + size_t index_; + int mode_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc new file mode 100644 index 0000000000..0fa2ed78b1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include + +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(StandaloneVadTest, Api) { + std::unique_ptr vad(StandaloneVad::Create()); + int16_t data[kLength10Ms] = {0}; + + // Valid frame length (for 32 kHz rate), but not what the VAD is expecting. + EXPECT_EQ(-1, vad->AddAudio(data, 320)); + + const size_t kMaxNumFrames = 3; + double p[kMaxNumFrames]; + for (size_t n = 0; n < kMaxNumFrames; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + + // Pretend `p` is shorter that it should be. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1)); + + EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames)); + + // Ask for activity when buffer is empty. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames)); + + // Should reset and result in one buffer. + for (size_t n = 0; n < kMaxNumFrames + 1; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + EXPECT_EQ(0, vad->GetActivity(p, 1)); + + // Wrong modes + EXPECT_EQ(-1, vad->set_mode(-1)); + EXPECT_EQ(-1, vad->set_mode(4)); + + // Valid mode. + const int kMode = 2; + EXPECT_EQ(0, vad->set_mode(kMode)); + EXPECT_EQ(kMode, vad->mode()); +} + +#if defined(WEBRTC_IOS) +TEST(StandaloneVadTest, DISABLED_ActivityDetection) { +#else +TEST(StandaloneVadTest, ActivityDetection) { +#endif + std::unique_ptr vad(StandaloneVad::Create()); + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(), + "rb"); + ASSERT_TRUE(pcm_file != NULL); + + FILE* reference_file = fopen( + test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb"); + ASSERT_TRUE(reference_file != NULL); + + // Reference activities are prepared with 0 aggressiveness. + ASSERT_EQ(0, vad->set_mode(0)); + + // Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The + // reference file is created for 30 ms frame. + const int kNumVadFramesToProcess = 3; + int num_frames = 0; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + vad->AddAudio(data, kDataLength); + num_frames++; + if (num_frames == kNumVadFramesToProcess) { + num_frames = 0; + int referece_activity; + double p[kNumVadFramesToProcess]; + EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1, + reference_file)); + int activity = vad->GetActivity(p, kNumVadFramesToProcess); + EXPECT_EQ(referece_activity, activity); + if (activity != 0) { + // When active, probabilities are set to 0.5. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.5, p[n]); + } else { + // When inactive, probabilities are set to 0.01. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.01, p[n]); + } + } + } + fclose(reference_file); + fclose(pcm_file); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc new file mode 100644 index 0000000000..aaf8214d7c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include +#include +#include + +#include "common_audio/third_party/ooura/fft_size_256/fft4g.h" +#include "modules/audio_processing/vad/pitch_internal.h" +#include "modules/audio_processing/vad/pole_zero_filter.h" +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "rtc_base/checks.h" +extern "C" { +#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h" +#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/structs.h" +} + +namespace webrtc { + +// The following structures are declared anonymous in iSAC's structs.h. To +// forward declare them, we use this derived class trick. +struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; +struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; + +static constexpr float kFrequencyResolution = + kSampleRateHz / static_cast(VadAudioProc::kDftSize); +static constexpr int kSilenceRms = 5; + +// TODO(turajs): Make a Create or Init for VadAudioProc. +VadAudioProc::VadAudioProc() + : audio_buffer_(), + num_buffer_samples_(kNumPastSignalSamples), + log_old_gain_(-2), + old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). + pitch_analysis_handle_(new PitchAnalysisStruct), + pre_filter_handle_(new PreFiltBankstr), + high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) { + static_assert(kNumPastSignalSamples + kNumSubframeSamples == + sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), + "lpc analysis window incorrect size"); + static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), + "correlation weight incorrect size"); + + // TODO(turajs): Are we doing too much in the constructor? + float data[kDftSize]; + // Make FFT to initialize. + ip_[0] = 0; + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + // TODO(turajs): Need to initialize high-pass filter. + + // Initialize iSAC components. + WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); + WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); +} + +VadAudioProc::~VadAudioProc() {} + +void VadAudioProc::ResetBuffer() { + memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], + sizeof(audio_buffer_[0]) * kNumPastSignalSamples); + num_buffer_samples_ = kNumPastSignalSamples; +} + +int VadAudioProc::ExtractFeatures(const int16_t* frame, + size_t length, + AudioFeatures* features) { + features->num_frames = 0; + if (length != kNumSubframeSamples) { + return -1; + } + + // High-pass filter to remove the DC component and very low frequency content. + // We have experienced that this high-pass filtering improves voice/non-voiced + // classification. + if (high_pass_filter_->Filter(frame, kNumSubframeSamples, + &audio_buffer_[num_buffer_samples_]) != 0) { + return -1; + } + + num_buffer_samples_ += kNumSubframeSamples; + if (num_buffer_samples_ < kBufferLength) { + return 0; + } + RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength); + features->num_frames = kNum10msSubframes; + features->silence = false; + + Rms(features->rms, kMaxNumFrames); + for (size_t i = 0; i < kNum10msSubframes; ++i) { + if (features->rms[i] < kSilenceRms) { + // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. + // Bail out here instead. + features->silence = true; + ResetBuffer(); + return 0; + } + } + + PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, + kMaxNumFrames); + FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); + ResetBuffer(); + return 0; +} + +// Computes |kLpcOrder + 1| correlation coefficients. +void VadAudioProc::SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index) { + RTC_DCHECK_GE(length_corr, kLpcOrder + 1); + double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; + size_t buffer_index = subframe_index * kNumSubframeSamples; + + for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) + windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; + + WebRtcIsac_AutoCorr(corr, windowed_audio, + kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); +} + +// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input. +// The analysis window is 15 ms long and it is centered on the first half of +// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the +// first half of each 10 ms subframe. +void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { + RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1)); + double corr[kLpcOrder + 1]; + double reflec_coeff[kLpcOrder]; + for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; + i++, offset_lpc += kLpcOrder + 1) { + SubframeCorrelation(corr, kLpcOrder + 1, i); + corr[0] *= 1.0001; + // This makes Lev-Durb a bit more stable. + for (size_t k = 0; k < kLpcOrder + 1; k++) { + corr[k] *= kCorrWeight[k]; + } + WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); + } +} + +// Fit a second order curve to these 3 points and find the location of the +// extremum. The points are inverted before curve fitting. +static float QuadraticInterpolation(float prev_val, + float curr_val, + float next_val) { + // Doing the interpolation in |1 / A(z)|^2. + float fractional_index = 0; + next_val = 1.0f / next_val; + prev_val = 1.0f / prev_val; + curr_val = 1.0f / curr_val; + + fractional_index = + -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); + RTC_DCHECK_LT(fabs(fractional_index), 1); + return fractional_index; +} + +// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope +// of the input signal. The local maximum of the spectral envelope corresponds +// with the local minimum of A(z). It saves complexity, as we save one +// inversion. Furthermore, we find the first local maximum of magnitude squared, +// to save on one square root. +void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, + size_t length_f_peak) { + RTC_DCHECK_GE(length_f_peak, kNum10msSubframes); + double lpc[kNum10msSubframes * (kLpcOrder + 1)]; + // For all sub-frames. + GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); + + const size_t kNumDftCoefficients = kDftSize / 2 + 1; + float data[kDftSize]; + + for (size_t i = 0; i < kNum10msSubframes; i++) { + // Convert to float with zero pad. + memset(data, 0, sizeof(data)); + for (size_t n = 0; n < kLpcOrder + 1; n++) { + data[n] = static_cast(lpc[i * (kLpcOrder + 1) + n]); + } + // Transform to frequency domain. + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + + size_t index_peak = 0; + float prev_magn_sqr = data[0] * data[0]; + float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; + float next_magn_sqr; + bool found_peak = false; + for (size_t n = 2; n < kNumDftCoefficients - 1; n++) { + next_magn_sqr = + data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + found_peak = true; + index_peak = n - 1; + break; + } + prev_magn_sqr = curr_magn_sqr; + curr_magn_sqr = next_magn_sqr; + } + float fractional_index = 0; + if (!found_peak) { + // Checking if |kNumDftCoefficients - 1| is the local minimum. + next_magn_sqr = data[1] * data[1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + index_peak = kNumDftCoefficients - 1; + } + } else { + // A peak is found, do a simple quadratic interpolation to get a more + // accurate estimate of the peak location. + fractional_index = + QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); + } + f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; + } +} + +// Using iSAC functions to estimate pitch gains & lags. +void VadAudioProc::PitchAnalysis(double* log_pitch_gains, + double* pitch_lags_hz, + size_t length) { + // TODO(turajs): This can be "imported" from iSAC & and the next two + // constants. + RTC_DCHECK_GE(length, kNum10msSubframes); + const int kNumPitchSubframes = 4; + double gains[kNumPitchSubframes]; + double lags[kNumPitchSubframes]; + + const int kNumSubbandFrameSamples = 240; + const int kNumLookaheadSamples = 24; + + float lower[kNumSubbandFrameSamples]; + float upper[kNumSubbandFrameSamples]; + double lower_lookahead[kNumSubbandFrameSamples]; + double upper_lookahead[kNumSubbandFrameSamples]; + double lower_lookahead_pre_filter[kNumSubbandFrameSamples + + kNumLookaheadSamples]; + + // Split signal to lower and upper bands + WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, + upper, lower_lookahead, upper_lookahead, + pre_filter_handle_.get()); + WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, + pitch_analysis_handle_.get(), lags, gains); + + // Lags are computed on lower-band signal with sampling rate half of the + // input signal. + GetSubframesPitchParameters( + kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, + &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); +} + +void VadAudioProc::Rms(double* rms, size_t length_rms) { + RTC_DCHECK_GE(length_rms, kNum10msSubframes); + size_t offset = kNumPastSignalSamples; + for (size_t i = 0; i < kNum10msSubframes; i++) { + rms[i] = 0; + for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) + rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; + rms[i] = sqrt(rms[i] / kNumSubframeSamples); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h new file mode 100644 index 0000000000..cbdd707129 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ + +#include +#include + +#include + +#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR... + +namespace webrtc { + +class PoleZeroFilter; + +class VadAudioProc { + public: + // Forward declare iSAC structs. + struct PitchAnalysisStruct; + struct PreFiltBankstr; + + VadAudioProc(); + ~VadAudioProc(); + + int ExtractFeatures(const int16_t* audio_frame, + size_t length, + AudioFeatures* audio_features); + + static constexpr size_t kDftSize = 512; + + private: + void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); + void SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index); + void GetLpcPolynomials(double* lpc, size_t length_lpc); + void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); + void Rms(double* rms, size_t length_rms); + void ResetBuffer(); + + // To compute spectral peak we perform LPC analysis to get spectral envelope. + // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. + // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame + // we need 5 ms of past signal to create the input of LPC analysis. + static constexpr size_t kNumPastSignalSamples = + static_cast(kSampleRateHz / 200); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static constexpr int kNoError = 0; + + static constexpr size_t kNum10msSubframes = 3; + static constexpr size_t kNumSubframeSamples = + static_cast(kSampleRateHz / 100); + // Samples in 30 ms @ given sampling rate. + static constexpr size_t kNumSamplesToProcess = + size_t{kNum10msSubframes} * kNumSubframeSamples; + static constexpr size_t kBufferLength = + size_t{kNumPastSignalSamples} + kNumSamplesToProcess; + static constexpr size_t kIpLength = kDftSize >> 1; + static constexpr size_t kWLength = kDftSize >> 1; + static constexpr size_t kLpcOrder = 16; + + size_t ip_[kIpLength]; + float w_fft_[kWLength]; + + // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). + float audio_buffer_[kBufferLength]; + size_t num_buffer_samples_; + + double log_old_gain_; + double old_lag_; + + std::unique_ptr pitch_analysis_handle_; + std::unique_ptr pre_filter_handle_; + std::unique_ptr high_pass_filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h new file mode 100644 index 0000000000..93589affe8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_internal.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ + +#include + +namespace webrtc { + +// These values should match MATLAB counterparts for unit-tests to pass. +static const double kCorrWeight[] = { + 1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, + 0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834, + 0.834132, 0.821620, 0.809296, 0.797156, 0.785199}; + +static const double kLpcAnalWin[] = { + 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, + 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, + 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, + 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, + 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, + 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, + 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, + 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, + 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, + 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, + 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, + 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, + 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, + 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, + 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, + 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, + 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, + 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, + 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, + 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, + 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, + 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, + 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, + 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, + 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, + 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, + 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, + 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, + 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, + 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, + 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, + 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, + 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, + 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, + 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, + 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, + 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, + 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, + 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, + 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000}; + +static const size_t kFilterOrder = 2; +static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f, + 0.974827f}; +static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f, + 0.972457f}; + +static_assert(kFilterOrder + 1 == + sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]), + "numerator coefficients incorrect size"); +static_assert(kFilterOrder + 1 == + sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]), + "denominator coefficients incorrect size"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc new file mode 100644 index 0000000000..0afed84c35 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// We don't test the value of pitch gain and lags as they are created by iSAC +// routines. However, interpolation of pitch-gain and lags is in a separate +// class and has its own unit-test. + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include +#include + +#include + +#include "modules/audio_processing/vad/common.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { + +TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) { + VadAudioProc audioproc; + + std::string peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* peak_file = fopen(peak_file_name.c_str(), "rb"); + ASSERT_TRUE(peak_file != NULL); + + std::string pcm_file_name = + test::ResourcePath("audio_processing/agc/agc_audio", "pcm"); + FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb"); + ASSERT_TRUE(pcm_file != NULL); + + // Read 10 ms audio in each iteration. + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + AudioFeatures features; + double sp[kMaxNumFrames]; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + audioproc.ExtractFeatures(data, kDataLength, &features); + if (features.num_frames > 0) { + ASSERT_LT(features.num_frames, kMaxNumFrames); + // Read reference values. + const size_t num_frames = features.num_frames; + ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file)); + for (size_t n = 0; n < features.num_frames; n++) + EXPECT_NEAR(features.spectral_peak[n], sp[n], 3); + } + } + + fclose(peak_file); + fclose(pcm_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc new file mode 100644 index 0000000000..31f14d7f64 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include + +namespace webrtc { + +VadCircularBuffer::VadCircularBuffer(int buffer_size) + : buffer_(new double[buffer_size]), + is_full_(false), + index_(0), + buffer_size_(buffer_size), + sum_(0) {} + +VadCircularBuffer::~VadCircularBuffer() {} + +void VadCircularBuffer::Reset() { + is_full_ = false; + index_ = 0; + sum_ = 0; +} + +VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) { + if (buffer_size <= 0) + return NULL; + return new VadCircularBuffer(buffer_size); +} + +double VadCircularBuffer::Oldest() const { + if (!is_full_) + return buffer_[0]; + else + return buffer_[index_]; +} + +double VadCircularBuffer::Mean() { + double m; + if (is_full_) { + m = sum_ / buffer_size_; + } else { + if (index_ > 0) + m = sum_ / index_; + else + m = 0; + } + return m; +} + +void VadCircularBuffer::Insert(double value) { + if (is_full_) { + sum_ -= buffer_[index_]; + } + sum_ += value; + buffer_[index_] = value; + index_++; + if (index_ >= buffer_size_) { + is_full_ = true; + index_ = 0; + } +} +int VadCircularBuffer::BufferLevel() { + if (is_full_) + return buffer_size_; + return index_; +} + +int VadCircularBuffer::Get(int index, double* value) const { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + *value = buffer_[index]; + return 0; +} + +int VadCircularBuffer::Set(int index, double value) { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + + sum_ -= buffer_[index]; + buffer_[index] = value; + sum_ += value; + return 0; +} + +int VadCircularBuffer::ConvertToLinearIndex(int* index) const { + if (*index < 0 || *index >= buffer_size_) + return -1; + + if (!is_full_ && *index >= index_) + return -1; + + *index = index_ - 1 - *index; + if (*index < 0) + *index += buffer_size_; + return 0; +} + +int VadCircularBuffer::RemoveTransient(int width_threshold, + double val_threshold) { + if (!is_full_ && index_ < width_threshold + 2) + return 0; + + int index_1 = 0; + int index_2 = width_threshold + 1; + double v = 0; + if (Get(index_1, &v) < 0) + return -1; + if (v < val_threshold) { + Set(index_1, 0); + int index; + for (index = index_2; index > index_1; index--) { + if (Get(index, &v) < 0) + return -1; + if (v < val_threshold) + break; + } + for (; index > index_1; index--) { + if (Set(index, 0.0) < 0) + return -1; + } + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h new file mode 100644 index 0000000000..c1806f9e83 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ + +#include + +namespace webrtc { + +// A circular buffer tailored to the need of this project. It stores last +// K samples of the input, and keeps track of the mean of the last samples. +// +// It is used in class "PitchBasedActivity" to keep track of posterior +// probabilities in the past few seconds. The posterior probabilities are used +// to recursively update prior probabilities. +class VadCircularBuffer { + public: + static VadCircularBuffer* Create(int buffer_size); + ~VadCircularBuffer(); + + // If buffer is wrapped around. + bool is_full() const { return is_full_; } + // Get the oldest entry in the buffer. + double Oldest() const; + // Insert new value into the buffer. + void Insert(double value); + // Reset buffer, forget the past, start fresh. + void Reset(); + + // The mean value of the elements in the buffer. The return value is zero if + // buffer is empty, i.e. no value is inserted. + double Mean(); + // Remove transients. If the values exceed `val_threshold` for a period + // shorter then or equal to `width_threshold`, then that period is considered + // transient and set to zero. + int RemoveTransient(int width_threshold, double val_threshold); + + private: + explicit VadCircularBuffer(int buffer_size); + // Get previous values. |index = 0| corresponds to the most recent + // insertion. |index = 1| is the one before the most recent insertion, and + // so on. + int Get(int index, double* value) const; + // Set a given position to `value`. `index` is interpreted as above. + int Set(int index, double value); + // Return the number of valid elements in the buffer. + int BufferLevel(); + + // Convert an index with the interpretation as get() method to the + // corresponding linear index. + int ConvertToLinearIndex(int* index) const; + + std::unique_ptr buffer_; + bool is_full_; + int index_; + int buffer_size_; + double sum_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc new file mode 100644 index 0000000000..efbd70d9d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include + +#include + +#include "test/gtest.h" + +namespace webrtc { + +static const int kWidthThreshold = 7; +static const double kValThreshold = 1.0; +static const int kLongBuffSize = 100; +static const int kShortBuffSize = 10; + +static void InsertSequentially(int k, VadCircularBuffer* circular_buffer) { + double mean_val; + for (int n = 1; n <= k; n++) { + EXPECT_TRUE(!circular_buffer->is_full()); + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_EQ((n + 1.0) / 2., mean_val); + } +} + +static void Insert(double value, + int num_insertion, + VadCircularBuffer* circular_buffer) { + for (int n = 0; n < num_insertion; n++) + circular_buffer->Insert(value); +} + +static void InsertZeros(int num_zeros, VadCircularBuffer* circular_buffer) { + Insert(0.0, num_zeros, circular_buffer); +} + +TEST(VadCircularBufferTest, GeneralTest) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kShortBuffSize)); + double mean_val; + + // Mean should return zero if nothing is inserted. + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0.0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + + // Should be full. + EXPECT_TRUE(circular_buffer->is_full()); + // Correct update after being full. + for (int n = 1; n < kShortBuffSize; n++) { + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val); + EXPECT_TRUE(circular_buffer->is_full()); + } + + // Check reset. This should be like starting fresh. + circular_buffer->Reset(); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + EXPECT_TRUE(circular_buffer->is_full()); +} + +TEST(VadCircularBufferTest, TransientsRemoval) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get()); + + double push_val = kValThreshold; + double mean_val; + for (int k = kWidthThreshold; k >= 1; k--) { + Insert(push_val, k, circular_buffer.get()); + circular_buffer->Insert(0); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val); + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + } +} + +TEST(VadCircularBufferTest, TransientDetection) { + std::unique_ptr circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + int num_insertion = kLongBuffSize - kWidthThreshold / 2; + InsertZeros(num_insertion, circular_buffer.get()); + + double push_val = 2; + // This is longer than a transient and shouldn't be removed. + int num_non_zero_elements = kWidthThreshold + 1; + Insert(push_val, num_non_zero_elements, circular_buffer.get()); + + double mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // A transient right after a non-transient, should be removed and mean is + // not changed. + num_insertion = 3; + Insert(push_val, num_insertion, circular_buffer.get()); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // Last input is larger than threshold, although the sequence is short but + // it shouldn't be considered transient. + Insert(push_val, num_insertion, circular_buffer.get()); + num_non_zero_elements += num_insertion; + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build new file mode 100644 index 0000000000..0e76427c6e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/vad_gn/moz.build @@ -0,0 +1,239 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_STRICT_FIELD_TRIALS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "!/third_party/libwebrtc/gen", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/vad/gmm.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pitch_internal.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/pole_zero_filter.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/standalone_vad.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/vad_audio_proc.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/vad_circular_buffer.cc", + "/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "mips32": + + DEFINES["MIPS32_LE"] = True + DEFINES["MIPS_FPU_LE"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["CPU_ARCH"] == "x86_64": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_GNU_SOURCE"] = True + +Library("vad_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc new file mode 100644 index 0000000000..02023d6a72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const size_t kNumChannels = 1; + +const double kDefaultVoiceValue = 1.0; +const double kNeutralProbability = 0.5; +const double kLowProbability = 0.01; + +} // namespace + +VoiceActivityDetector::VoiceActivityDetector() + : last_voice_probability_(kDefaultVoiceValue), + standalone_vad_(StandaloneVad::Create()) {} + +VoiceActivityDetector::~VoiceActivityDetector() = default; + +// Because ISAC has a different chunk length, it updates +// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data. +// Otherwise it clears them. +void VoiceActivityDetector::ProcessChunk(const int16_t* audio, + size_t length, + int sample_rate_hz) { + RTC_DCHECK_EQ(length, sample_rate_hz / 100); + // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio. + // Resample to the required rate. + const int16_t* resampled_ptr = audio; + if (sample_rate_hz != kSampleRateHz) { + RTC_CHECK_EQ( + resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), + 0); + resampler_.Push(audio, length, resampled_, kLength10Ms, length); + resampled_ptr = resampled_; + } + RTC_DCHECK_EQ(length, kLength10Ms); + + // Each chunk needs to be passed into `standalone_vad_`, because internally it + // buffers the audio and processes it all at once when GetActivity() is + // called. + RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); + + audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); + + chunkwise_voice_probabilities_.resize(features_.num_frames); + chunkwise_rms_.resize(features_.num_frames); + std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), + chunkwise_rms_.begin()); + if (features_.num_frames > 0) { + if (features_.silence) { + // The other features are invalid, so set the voice probabilities to an + // arbitrary low value. + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kLowProbability); + } else { + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kNeutralProbability); + RTC_CHECK_GE( + standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], + chunkwise_voice_probabilities_.size()), + 0); + RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( + features_, &chunkwise_voice_probabilities_[0]), + 0); + } + last_voice_probability_ = chunkwise_voice_probabilities_.back(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h new file mode 100644 index 0000000000..92b9a8c208 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ + +#include +#include + +#include +#include + +#include "common_audio/resampler/include/resampler.h" +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/pitch_based_vad.h" +#include "modules/audio_processing/vad/standalone_vad.h" +#include "modules/audio_processing/vad/vad_audio_proc.h" + +namespace webrtc { + +// A Voice Activity Detector (VAD) that combines the voice probability from the +// StandaloneVad and PitchBasedVad to get a more robust estimation. +class VoiceActivityDetector { + public: + VoiceActivityDetector(); + ~VoiceActivityDetector(); + + // Processes each audio chunk and estimates the voice probability. + // TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove + // `sample_rate_hz`. + void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz); + + // Returns a vector of voice probabilities for each chunk. It can be empty for + // some chunks, but it catches up afterwards returning multiple values at + // once. + const std::vector& chunkwise_voice_probabilities() const { + return chunkwise_voice_probabilities_; + } + + // Returns a vector of RMS values for each chunk. It has the same length as + // chunkwise_voice_probabilities(). + const std::vector& chunkwise_rms() const { return chunkwise_rms_; } + + // Returns the last voice probability, regardless of the internal + // implementation, although it has a few chunks of delay. + float last_voice_probability() const { return last_voice_probability_; } + + private: + // TODO(aluebs): Change these to float. + std::vector chunkwise_voice_probabilities_; + std::vector chunkwise_rms_; + + float last_voice_probability_; + + Resampler resampler_; + VadAudioProc audio_processing_; + + std::unique_ptr standalone_vad_; + PitchBasedVad pitch_based_vad_; + + int16_t resampled_[kLength10Ms]; + AudioFeatures features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc new file mode 100644 index 0000000000..80f21c8db0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include +#include + +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +const int kStartTimeSec = 16; +const float kMeanSpeechProbability = 0.3f; +const float kMaxNoiseProbability = 0.1f; +const size_t kNumChunks = 300u; +const size_t kNumChunksPerIsacBlock = 3; + +void GenerateNoise(std::vector* data) { + for (size_t i = 0; i < data->size(); ++i) { + // std::rand returns between 0 and RAND_MAX, but this will work because it + // wraps into some random place. + (*data)[i] = std::rand(); + } +} + +} // namespace + +TEST(VoiceActivityDetectorTest, ConstructorSetsDefaultValues) { + const float kDefaultVoiceValue = 1.f; + + VoiceActivityDetector vad; + + std::vector p = vad.chunkwise_voice_probabilities(); + std::vector rms = vad.chunkwise_rms(); + + EXPECT_EQ(p.size(), 0u); + EXPECT_EQ(rms.size(), 0u); + + EXPECT_FLOAT_EQ(vad.last_voice_probability(), kDefaultVoiceValue); +} + +TEST(VoiceActivityDetectorTest, Speech16kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 16000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio16kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Speech32kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 32000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio32kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Noise16kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector data(kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the `max_probability` value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +TEST(VoiceActivityDetectorTest, Noise32kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector data(2 * kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the `max_probability` value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h b/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h new file mode 100644 index 0000000000..ef4ad7e21e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/vad/voice_gmm_tables.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for active segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ + +static const int kVoiceGmmNumMixtures = 12; +static const int kVoiceGmmDim = 3; + +static const double + kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = { + {{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, + {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, + {4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, + {{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, + {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, + {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, + {{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, + {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, + {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, + {{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, + {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, + {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, + {{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, + {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, + {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, + {{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, + {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, + {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, + {{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, + {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, + {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, + {{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, + {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, + {7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, + {{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, + {1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, + {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, + {{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, + {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, + {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, + {{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, + {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, + {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, + {{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, + {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, + {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; + +static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { + {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, + {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, + {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, + {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, + {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, + {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, + {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, + {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, + {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, + {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, + {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, + {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; + +static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { + -1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01, + -1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01, + -1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01, + -1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00}; +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ -- cgit v1.2.3